def test_import(sfinit, modules): """ Test code by importing all available classes for this module. If any of these fails then the module itself has some code error (e.g., syntax errors, inheritance errors). """ sfinit for package, module_list in modules.items(): for module in module_list: config.custom_import(MODULE, module)()
def test_setup(sfinit, modules): """ Test the expected behavior of each of the rqeuired functions. Setup: make sure that setup creates the necessary directory structure :param sfinit: :param modules: :return: """ return sf = sfinit PATH = sys.modules["seisflows_paths"] SETUP_CREATES = [PATH.SCRATCH, PATH.SYSTEM, PATH.OUTPUT] for package, module_list in modules.items(): for module in module_list: loaded_module = config.custom_import(MODULE, module)() # Make sure these don't already exist for path_ in SETUP_CREATES: assert(not os.path.exists(path_)) loaded_module.setup() # Check that the minimum required directories were created for path_ in SETUP_CREATES: assert(os.path.exists(path_)) # Remove created paths so we can check the next module for path_ in SETUP_CREATES: if os.path.isdir(path_): shutil.rmtree(path_) else: os.remove(path_)
class Subclass(custom_import("MODULE NAME HERE", "PARENT CLASS NAME HERE")): """ This is a template subclass """ # Class-specific logger accessed using self.logger # When this logger is called, e.g., self.logger.info("text"), the logging # package will know exactly which module, class and function the log # statement has been sent from, extraordinarily helpful for debugging. logger = logging.getLogger(__name__).getChild(__qualname__) @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. :rtype: seisflows.config.SeisFlowsPathsParameters :return: Paths and parameters that define the given class """ # The super().required argument ensures that the sublcass inherits the # paths and parameters defined by its parent class sf = SeisFlowsPathsParameters(super().required) # > Additional or overloading paths and parameters can be set here return sf def check(self, validate=True): """ Checks parameters and paths. The validate function ensures that all required paths and parameters are accounted for, and that all optional paths and parameters are set to user-defined or default values. """ if validate: self.required.validate() # Validation only required by the lowest subclass, which will validate # all the paths and parameters from each of its parent classes super.check(validate=False) def test(self, *args, **kwargs): """ This is an example OVERWRITE of the base_class.test() function. If a super() statement is used, all the code within the base class will be run. """ # The super statements calls the code chunk in base_class.test() # Here it will be executed before the remainder of sub_class.test() is # executed super.test() # Multiple logging levels determine how verbose the module will be self.logger.info("important log statement goes here") self.logger.debug("debugging log statement goes here") self.logger.warning("warnings can be passed here")
def test_required_functions_exist(sfinit, modules): """ Make sure that the named, required functions exist within the class Do not execute just make sure they're defined, because they will be expected by other modules """ sfinit for package, module_list in modules.items(): for module in module_list: loaded_module = config.custom_import(MODULE, module)() for func in REQUIRED_FUNCTIONS: assert(func in dir(loaded_module)), \ f"'{func}' is a required function in module: " \ f"{MODULE}.{module}"
def test_required_parameters_exist(sfinit, modules): """ Ensure that the required parameters are set in all the classes/subclasses That is, that the parameters defined above in REQUIRED_PARAMETERS have been defined by each SYSTEM class """ sfinit for package, module_list in modules.items(): for module in module_list: loaded_module = config.custom_import(MODULE, module)() sf_pp = loaded_module.required # Check that required parameters are set for req_par in REQUIRED_PARAMETERS: assert(req_par in sf_pp.parameters.keys()), \ f"{req_par} is a required parameter for module {MODULE}"
def test_required_functions_exist(sfinit): """ Make sure that the named, required functions exist within the class Do not execute just make sure they're defined, because they will be expected by other modules """ sfinit for name in config.NAMES: for package, module_list in return_modules()[name].items(): for module in module_list: loaded_module = config.custom_import(name, module)() # Check that required parameters are set for func in required_structure[name]["functions"]: assert(func in dir(loaded_module)), \ f"{func} is a required function for module: " \ f"{name}.{module}"
def test_required_parameters_exist(sfinit): """ Ensure that the required parameters are set in all the classes/subclasses That is, that the parameters defined above in REQUIRED_PARAMETERS have been defined by each SYSTEM class """ sfinit for name in config.NAMES: for package, module_list in return_modules()[name].items(): for module in module_list: loaded_module = config.custom_import(name, module)() sf_pp = loaded_module.required # Check that required parameters are set for req_par in required_structure[name]["parameters"]: assert(req_par in sf_pp.parameters.keys()), \ f"{req_par} is a required parameter for module: " \ f"{name}.{module}"
def test_validate(sfinit, modules): """ Test out path and parameter validation, essentially checking that all the paths and parameters are set properly .. note:: This doesn't work because we have required parameters that are not set in the default parameter file. We can run configure beforehand but does that make sense? :return: """ return sfinit for package, module_list in modules.items(): for module in module_list: loaded_module = config.custom_import(MODULE, module)() from IPython import embed embed() loaded_module.required.validate()
def test_custom_import(sfinit): """ Test that importing based on internal modules works for various inputs :return: """ sfinit with pytest.raises(SystemExit): config.custom_import() with pytest.raises(SystemExit): config.custom_import(name="NOT A VALID NAME") module = config.custom_import(name="optimize", module="LBFGS") assert (module.__name__ == "LBFGS") assert (module.__module__ == "seisflows3.optimize.LBFGS") # Check one more to be safe module = config.custom_import(name="optimize", module="base") assert (module.__name__ == "Base") assert (module.__module__ == "seisflows3.optimize.base")
class LBFGS(custom_import("optimize", "base")): """ The Limited memory BFGS algorithm Calls upon seisflows.plugin.optimize.LBFGS to accomplish LBFGS algorithm Includes optional safeguards: periodic restarting and descent conditions. To conserve memory, most vectors are read from disk rather than passed from a calling routine. L-BFGS Variables: s: memory of model differences y: memory of gradient differences Optimization Variables: m: model f: objective function value g: gradient direction p: search direction Line Search Variables: x: list of step lenths from current line search f: correpsonding list of function values m: number of step lengths in current line search n: number of model updates in optimization problem gtg: dot product of gradient with itself gtp: dot product of gradient and search direction Status codes status > 0 : finished status == 0 : not finished status < 0 : failed """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type LBFGS: Class :param LBFGS: plugin LBFGS class that controls the machinery of the L-BFGS optimization schema :type LBFGS_iter: int :param LBFGS_iter: an internally used iteration that differs from optimization iter. Keeps track of internal LBFGS memory of previous gradients. If LBFGS is restarted, the LBFGS_iter iteration is reset, but the optization iteration. :type memory_used: int :param memory_used: bookkeeping to see how many previous gradients have been stored to internal memory. Should not exceed PAR.LBFGSMEM :type LBFGS_dir: str :param LBFGS_dir: location to store LBFGS internal memory :type y_file: str :param y_file: path to store memory of the gradient differences i.e., `g_new - g_old` :type s_file: str :param s_file: path to store memory of the model differences i.e., `m_new - m_old` """ super().__init__() self.LBFGS_iter = 0 self.memory_used = 0 self.LBFGS_dir = "LBFGS" self.y_file = os.path.join(self.LBFGS_dir, "Y") self.s_file = os.path.join(self.LBFGS_dir, "S") @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("LINESEARCH", required=False, default="Backtrack", par_type=str, docstr="Algorithm to use for line search, see " "seisflows.plugins.line_search for available choices") sf.par("LBFGSMEM", required=False, default=3, par_type=int, docstr="Max number of previous gradients to retain " "in local memory") sf.par("LBFGSMAX", required=False, par_type=int, default="inf", docstr="LBFGS periodic restart interval, between 1 and 'inf'") sf.par("LBFGSTHRESH", required=False, default=0., par_type=float, docstr="LBFGS angle restart threshold") return sf def check(self, validate=True): """ Checks parameters, paths, and dependencies """ super().check(validate=False) if validate: self.required.validate() assert(PAR.LINESEARCH.upper() == "BACKTRACK"), \ "LBFGS requires a Backtracking line search" def setup(self): """ Set up the LBFGS optimization schema """ super().setup() # Create a separate directory for LBFGS matters unix.cd(PATH.OPTIMIZE) unix.mkdir(self.LBFGS_dir) def compute_direction(self): """ Call on the L-BFGS optimization machinery to compute a search direction using internally stored memory of previous gradients. The potential outcomes when computing direction with L-BFGS 1. First iteration of L-BFGS optimization, search direction is defined as the inverse gradient 2. L-BFGS internal iteration ticks over the maximum allowable number of iterations, force a restart condition, search direction is the inverse gradient 3. New search direction vector is too far from previous direction, force a restart, search direction is inverse gradient 4. New search direction is acceptably angled from previous, becomes the new search direction """ self.logger.info(f"computing search direction with L-BFGS") self.LBFGS_iter += 1 unix.cd(PATH.OPTIMIZE) # Load the current gradient direction, which is the L-BFGS search # direction if this is the first iteration g = self.load(self.g_new) if self.LBFGS_iter == 1: self.logger.info("first L-BFGS iteration, setting search direction " "as inverse gradient") p_new = -g restarted = 0 # Restart condition or first iteration lead to setting search direction # as the inverse gradient (i.e., default to steepest descent) elif self.LBFGS_iter > PAR.LBFGSMAX: self.logger.info("restarting L-BFGS due to periodic restart " "condition. setting search direction as" "inverse gradient") self.restart() p_new = -g restarted = 1 # Normal LBFGS direction computation else: # Update the search direction, apply the inverse Hessian such that # 'q' becomes the new search direction 'g' self.logger.info("applying inverse Hessian to gradient") s, y = self.update() q = self.apply(g, s, y) # Determine if the new search direction is appropriate by checking # its angle to the previous search direction if self.check_status(g, q): self.logger.info("new L-BFGS search direction found") p_new = -q restarted = 0 else: self.logger.info("new search direction not appropriate, " "defaulting to inverse gradient") self.restart() p_new = -g restarted = 1 # Save values to disk and memory self.save(self.p_new, p_new) self.restarted = restarted def restart(self): """ On top of base restart class, include a restart of the LBFGS internal memory and memmaps """ super().restart() self.logger.info("restarting L-BFGS optimization algorithm by clearing " "internal memory") self.LBFGS_iter = 1 self.memory_used = 0 unix.cd(PATH.OPTIMIZE) s = np.memmap(filename=self.s_file, mode="r+") y = np.memmap(filename=self.y_file, mode="r+") s[:] = 0. y[:] = 0. def update(self): """ Updates L-BFGS algorithm history .. note:: Because models are large, and multiple iterations of models need to be stored in memory, previous models are stored as `memmaps`, which allow for access of small segments of large files on disk, without reading the entire file. Memmaps are array like objects. .. note:: Notation for s and y taken from Liu & Nocedal 1989 iterate notation: sk = x_k+1 - x_k and yk = g_k+1 - gk :rtype s: np.memmap :return s: memory of the model differences `m_new - m_old` :rtype y: np.memmap :return y: memory of the gradient differences `g_new - g_old` """ unix.cd(PATH.OPTIMIZE) # Determine the iterates for model m and gradient g s_k = self.load(self.m_new) - self.load(self.m_old) y_k = self.load(self.g_new) - self.load(self.g_old) # Determine the shape of the memory map (length of model, length of mem) m = len(s_k) n = PAR.LBFGSMEM # Initial iteration, need to create the memory map if self.memory_used == 0: s = np.memmap(filename=self.s_file, mode="w+", dtype="float32", shape=(m, n)) y = np.memmap(filename=self.y_file, mode="w+", dtype="float32", shape=(m, n)) # Store the model and gradient differences in memmaps s[:, 0] = s_k y[:, 0] = y_k self.memory_used = 1 # Subsequent iterations will append to memory maps else: s = np.memmap(filename=self.s_file, mode="r+", dtype="float32", shape=(m, n)) y = np.memmap(filename=self.y_file, mode="r+", dtype="float32", shape=(m, n)) # Shift all stored memory by one index to make room for latest mem s[:, 1:] = s[:, :-1] y[:, 1:] = y[:, :-1] # Store the latest model and gradient in first index s[:, 0] = s_k y[:, 0] = y_k # Keep track of the memory used if self.memory_used < PAR.LBFGSMEM: self.memory_used += 1 return s, y def apply(self, q, s=None, y=None): """ Applies L-BFGS inverse Hessian to given vector :type q: np.array :param q: gradient direction to apply L-BFGS to :type s: np.memmap :param s: memory of model differences :param s: memory of model differences :type y: np.memmap :param y: memory of gradient direction differences :rtype r: np.array :return r: new search direction from application of L-BFGS """ unix.cd(PATH.OPTIMIZE) # If no memmaps are given as arguments, instantiate them if s is None or y is None: m = len(q) n = PAR.LBFGSMEM s = np.memmap(filename=self.s_file, mode="w+", dtype="float32", shape=(m, n)) y = np.memmap(filename=self.y_file, mode="w+", dtype="float32", shape=(m, n)) # First matrix product # Recursion step 2 from appendix A of Modrak & Tromp 2016 kk = self.memory_used rh = np.zeros(kk) al = np.zeros(kk) for ii in range(kk): rh[ii] = 1 / np.dot(y[:, ii], s[:, ii]) al[ii] = rh[ii] * np.dot(s[:, ii], q) q = q - al[ii] * y[:, ii] # Apply a preconditioner if available if self.precond: r = self.precond(q) else: r = q # Use scaling M3 proposed by Liu and Nocedal 1989 sty = np.dot(y[:, 0], s[:, 0]) yty = np.dot(y[:, 0], y[:, 0]) r *= sty/yty # Second matrix product # Recursion step 4 from appendix A of Modrak & Tromp 2016 for ii in range(kk - 1, -1, -1): be = rh[ii] * np.dot(y[:, ii], r) r = r + s[:, ii] * (al[ii] - be) return r def check_status(self, g, r): """ Check the status of the apply() function, determine if restart necessary Return of False means restart, return of True means good to go. :type g: np.array :param g: current gradient direction :type r: np.array :param r: new gradient direction :rtype: bool :return: okay status based on status check (False==bad, True==good) """ theta = 180. * np.pi ** -1 * angle(g, r) self.logger.info(f"new search direction: {theta:.2f}{DEG} from current") if not 0. < theta < 90.: self.logger.info("restarting L-BFGS, theta not a descent direction") okay = False elif theta > 90. - PAR.LBFGSTHRESH: self.logger.info("restarting L-BFGS due to practical safeguard") okay = False else: okay = True return okay
class Specfem3D(custom_import("solver", "base")): """ Python interface to Specfem3D Cartesian. This subclass inherits functions from seisflows3.solver.Base !!! See base class for method descriptions !!! """ logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type logger: Logger :param logger: Class-specific logging module, log statements pushed from this logger will be tagged by its specific module/classname """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("NT", required=True, par_type=float, docstr="Number of time steps set in the SPECFEM Par_file") sf.par("DT", required=True, par_type=float, docstr="Time step or delta set in the SPECFEM Par_file") sf.par("FORMAT", required=True, par_type=float, docstr="Format of synthetic waveforms used during workflow, " "available options: ['ascii', 'su']") sf.par("SOURCE_PREFIX", required=False, default="CMTSOLUTION", par_type=str, docstr="Prefix of SOURCE files in path SPECFEM_DATA. Available " "['CMTSOLUTION', FORCESOLUTION']") return sf def check(self, validate=True): """ Checks parameters and paths """ if validate: self.required.validate() super().check(validate=False) acceptable_formats = ["SU", "ASCII"] if PAR.FORMAT.upper() not in acceptable_formats: raise Exception(f"'FORMAT' must be {acceptable_formats}") def generate_data(self, **model_kwargs): """ Generates data using the True model, exports traces to `traces/obs` :param model_kwargs: keyword arguments to pass to `generate_mesh` """ # Create the mesh self.generate_mesh(**model_kwargs) # Run the Forward simulation unix.cd(self.cwd) setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file") if PAR.ATTENUATION: setpar(key="ATTENUATION", val=".true.", file="DATA/Par_file") else: setpar(key="ATTENUATION", val=".false.", file="DATA/Par_file") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D") unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)), dst=os.path.join("traces", "obs")) # Export traces to disk for permanent storage if PAR.SAVETRACES: self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs")) def generate_mesh(self, model_path, model_name, model_type=None): """ Performs meshing with internal mesher Meshfem3D and database generation :type model_path: str :param model_path: path to the model to be used for mesh generation :type model_name: str :param model_name: name of the model to be used as identification :type model_type: str :param model_type: available model types to be passed to the Specfem3D Par_file. See Specfem3D Par_file for available options. """ available_model_types = ["gll"] assert (exists(model_path)), f"model {model_path} does not exist" model_type = model_type or getpar(key="MODEL", file="DATA/Par_file") assert(model_type in available_model_types), \ f"{model_type} not in available types {available_model_types}" unix.cd(self.cwd) # Run mesh generation if model_type == "gll": self.check_mesh_properties(model_path) src = glob(os.path.join(model_path, "*")) dst = self.model_databases unix.cp(src, dst) call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem3D") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xgenerate_databases") # Export the model for future use in the workflow if self.taskid == 0: self.export_model(os.path.join(PATH.OUTPUT, model_name)) def eval_func(self, *args, **kwargs): """ Call eval_func from Base class """ super().eval_func(*args, **kwargs) # Work around SPECFEM3D conflicting name conventions of SU data self.rename_data() def forward(self, path="traces/syn"): """ Calls SPECFEM3D forward solver, exports solver outputs to traces dir :type path: str :param path: path to export traces to after completion of simulation """ # Set parameters and run forward simulation setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file") if PAR.ATTENUATION: setpar(key="ATTENUATION", val=".true.", file="DATA/Par_file") else: setpar(key="ATTENUATION", val=".false`.", file="DATA/Par_file") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xgenerate_databases") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D") # Find and move output traces, by default to synthetic traces dir unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)), dst=path) def adjoint(self): """ Calls SPECFEM3D adjoint solver, creates the `SEM` folder with adjoint traces which is required by the adjoint solver """ setpar(key="SIMULATION_TYPE", val="3", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".false.", file="DATA/Par_file") setpar(key="ATTENUATION", val=".false.", file="DATA/Par_file") unix.rm("SEM") unix.ln("traces/adj", "SEM") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D") def check_solver_parameter_files(self): """ Checks solver parameters """ # Check the number of steps in the SPECFEM2D Par_file nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file") if int(nt) != PAR.NT: if self.taskid == 0: print( msg.cli( f"SPECFEM3D {nt_str}=={nt} is not equal " f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file") if float(dt) != PAR.DT: if self.taskid == 0: print( msg.cli( f"SPECFEM3D {dt_str}=={dt} is not equal " f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) # Ensure that NPROC matches the MESH values nproc = self.mesh_properties.nproc if nproc != PAR.NPROC: if self.taskid == 0: print( msg.cli( f"SPECFEM3D mesh NPROC=={nproc} is not equal " f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. " f"Please check that your mesh matches this val.", header="parameter match error", border="=")) sys.exit(-1) if "MULTIPLES" in PAR: raise NotImplementedError def initialize_adjoint_traces(self): """ Setup utility: Creates the "adjoint traces" expected by SPECFEM Note: Adjoint traces are initialized by writing zeros for all channels. Channels actually in use during an inversion or migration will be overwritten with nonzero values later on. """ # Initialize adjoint traces as zeroes for all data_filenames # write to `traces/adj` super().initialize_adjoint_traces() # Rename data to work around Specfem naming convetions self.rename_data() # Workaround for Specfem3D's requirement that all components exist, # even ones not in use as adjoint traces if PAR.FORMAT.upper() == "SU": unix.cd(os.path.join(self.cwd, "traces", "adj")) for iproc in range(PAR.NPROC): for channel in ["x", "y", "z"]: dst = f"{iproc:d}_d{channel}_SU.adj" if not exists(dst): src = f"{iproc:d}_d{PAR.COMPONENTS[0]}_SU.adj" unix.cp(src, dst) def rename_data(self): """ Works around conflicting data filename conventions Specfem3D's uses different name conventions for regular traces and 'adjoint' traces """ if PAR.FORMAT.upper() == "SU": files = glob(os.path.join(self.cwd, "traces", "adj", "*SU")) unix.rename(old='_SU', new='_SU.adj', names=files) def write_parameters(self): """ Write a set of parameters !!! This calls on plugins.solver.specfem3d.write_parameters() but that function doesn't exist !!! """ unix.cd(self.cwd) solvertools.write_parameters(vars(PAR)) def write_receivers(self): """ Write a list of receivers into a text file !!! This calls on plugins.solver.specfem3d.write_receivers() but incorrect number of parameters is forwarded !!! """ unix.cd(self.cwd) setpar(key="use_existing_STATIONS", val=".true", file="DATA/Par_file") _, h = preprocess.load("traces/obs") solvertools.write_receivers(h.nr, h.rx, h.rz) def write_sources(self): """ Write sources to text file """ unix.cd(self.cwd) _, h = preprocess.load(dir="traces/obs") solvertools.write_sources(PAR=vars(PAR), h=h) @property def data_wildcard(self): """ Returns a wildcard identifier for synthetic data :rtype: str :return: wildcard identifier for channels """ if PAR.FORMAT.upper() == "SU": return f"*_d?_SU" elif PAR.FORMAT.upper() == "ASCII": return f"*.?X?.sem?" @property def data_filenames(self): """ Returns the filenames of all data, either by the requested components or by all available files in the directory. :rtype: list :return: list of data filenames """ unix.cd(os.path.join(self.cwd, "traces", "obs")) if PAR.COMPONENTS: components = PAR.COMPONENTS if PAR.FORMAT.upper() == "SU": return sorted(glob(f"*_d[{components.lower()}]_SU")) elif PAR.FORMAT.upper() == "ASCII": return sorted(glob(f"*.?X[{components.upper()}].sem?")) else: if PAR.FORMAT.upper() == "SU": return sorted(glob("*_d?_SU")) elif PAR.FORMAT.upper() == "ASCII": return sorted(glob("*.???.sem?")) @property def kernel_databases(self): """ The location of databases for kernel outputs, relative to the current working directory. """ return os.path.join(self.cwd, "OUTPUT_FILES", "DATABASES_MPI") @property def model_databases(self): """ The location of databases for model outputs """ return os.path.join(self.cwd, "OUTPUT_FILES", "DATABASES_MPI") @property def source_prefix(self): """ Specfem3D's preferred source prefix :rtype: str :return: source prefix """ return PAR.SOURCE_PREFIX.upper()
class Specfem3DMaui(custom_import("solver", "specfem3d")): """ Python interface to Specfem3D Cartesian. This subclass inherits functions from seisflows3.solver.specfem3d !!! See base class for method descriptions !!! """ @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) return sf def check(self, validate=True): """ Checks parameters and paths """ if validate: self.required.validate() super().check(validate=False) def setup(self, model): """ Overload of solver.base.setup(), should be run as a single instance :type model: str :param model: "init", "true", generates the mesh to be used for workflow "true" used for synthetic-synthetic cases "init" for initial model, default :type model: str :param model: model to setup, either 'true' or 'init' """ # Choice of model will determine which mesh to generate self.generate_mesh(model_path=getattr(PATH, f"MODEL_{model.upper()}"), model_name=f"model_{model.lower()}", model_type="gll") self.distribute_databases() def generate_data(self): """ Overload seisflows.solver.base.generate_data. To be run in parallel Not used if PAR.CASE == "Data" Generates data in the synthetic-synthetic comparison case. Automatically calls generate mesh for the true model, rather than passing them in as kwargs. Also turns on attenuation for the forward model !!! attenuation could be moved into parameters.yaml? !!! """ unix.cd(self.cwd) setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file") if PAR.ATTENUATION: setpar(key="ATTENUATION ", val=".true.", file="DATA/Par_file") else: setpar(key="ATTENUATION ", val=".false.", file="DATA/Par_file") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D") # move ASCII .sem? files into appropriate directory unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)), dst=os.path.join("traces", "obs")) # Export traces to permanent storage on disk if PAR.SAVETRACES: self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs")) def generate_mesh(self, model_path, model_name, model_type='gll'): """ Performs meshing and database generation as a serial task. Differs slightly from specfem3d class as it only creates database files for the main solver, which are then copied in serial by the function distribute_databases() :type model_path: str :param model_path: path to the model to be used for mesh generation :type model_name: str :param model_name: name of the model to be used as identification :type model_type: str :param model_type: available model types to be passed to the Specfem3D Par_file. See Specfem3D Par_file for available options. """ available_model_types = ["gll"] assert (exists(model_path)), f"model {model_path} does not exist" model_type = model_type or getpar(key="MODEL", file="DATA/Par_file") assert(model_type in available_model_types), \ f"{model_type} not in available types {available_model_types}" # Ensure that we're running on the main solver only assert (self.taskid == 0) unix.cd(self.cwd) # Check that the model parameter falls into the acceptable types par = getpar("MODEL").strip() assert(par in available_model_types), \ f"Par_file {par} not in available types {available_model_types}" if par == "gll": self.check_mesh_properties(model_path) # Copy model files and then run xgenerate databases src = glob(os.path.join(model_path, "*")) dst = self.model_databases unix.cp(src, dst) call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xgenerate_databases") self.export_model(os.path.join(PATH.OUTPUT, model_name)) def eval_misfit(self, path='', export_traces=False): """ Performs function evaluation only, that is, the misfit quantification. Forward simulations are performed in a separate function :type path: str :param path: path in the scratch directory to use for I/O :type export_traces: bool :param export_traces: option to save the observation traces to disk :return: """ preprocess.prepare_eval_grad(cwd=self.cwd, taskid=self.taskid, source_name=self.source_name, filenames=self.data_filenames) if export_traces: self.export_residuals(path) def eval_fwd(self, path=''): """ High level solver interface Performans forward simulations only, function evaluation is split off into its own function :type path: str :param path: path in the scratch directory to use for I/O """ unix.cd(self.cwd) self.import_model(path) self.forward() def distribute_databases(self): """ A serial task to distrubute the database files outputted by xgenerate_databases from main solver to all other solver directories """ # Copy the database files but ignore any vt? files src_db = glob( os.path.join(PATH.SOLVER, self.mainsolver, "OUTPUT_FILES", "DATABASES_MPI", "*")) for extension in [".vtu", ".vtk"]: src_db = [_ for _ in src_db if extension not in _] # Copy the .h files from the mesher, Specfem needs these as well src_h = glob( os.path.join(PATH.SOLVER, self.mainsolver, "OUTPUT_FILES", "*.h")) for source_name in self.source_names: # Ensure main solver is skipped if source_name == self.mainsolver: continue # Copy database files to each of the other source directories dst_db = os.path.join(PATH.SOLVER, source_name, "OUTPUT_FILES", "DATABASES_MPI", "") unix.cp(src_db, dst_db) # Copy mesher h files into the overlying directory dst_h = os.path.join(PATH.SOLVER, source_name, "OUTPUT_FILES", "") unix.cp(src_h, dst_h) def initialize_solver_directories(self): """ Creates solver directories in serial using a single node. Should only be run by master job. Differs from Base initialize_solver_directories() as this serial task will create directory structures for each source, rather than having each source create its own. However the internal dir structure is the same. """ for source_name in self.source_names: cwd = os.path.join(PATH.SOLVER, source_name) # Remove any existing scratch directory unix.rm(cwd) # Create internal directory structure, change into directory to make # all actions RELATIVE path actions unix.mkdir(cwd) unix.cd(cwd) for cwd_dir in [ "bin", "DATA", "OUTPUT_FILES/DATABASES_MPI", "traces/obs", "traces/syn", "traces/adj" ]: unix.mkdir(cwd_dir) # Copy exectuables src = glob(os.path.join(PATH.SPECFEM_BIN, "*")) dst = os.path.join("bin", "") unix.cp(src, dst) # Copy all input files except source files src = glob(os.path.join(PATH.SPECFEM_DATA, "*")) src = [_ for _ in src if self.source_prefix not in _] dst = os.path.join("DATA", "") unix.cp(src, dst) # symlink event source specifically src = os.path.join(PATH.SPECFEM_DATA, f"{self.source_prefix}_{source_name}") dst = os.path.join("DATA", self.source_prefix) unix.ln(src, dst) if source_name == self.mainsolver: # Symlink taskid_0 as mainsolver in solver directory unix.ln(source_name, os.path.join(PATH.SOLVER, "mainsolver")) # Only check the solver parameters once self.check_solver_parameter_files() def check_solver_parameter_files(self): """ Checks solver parameters. Only slightly different to Specfem3D as it is run by the main task, not be an array process, so no need to check task_id """ nt = getpar(key="NSTEP", cast=int) dt = getpar(key="DT", cast=float) if nt != PAR.NT: warnings.warn("Specfem3D NSTEP != PAR.NT\n" "overwriting Specfem3D with Seisflows parameter") setpar(key="NSTEP", val=PAR.NT) if dt != PAR.DT: warnings.warn("Specfem3D DT != PAR.DT\n" "overwriting Specfem3D with Seisflows parameter") setpar(key="DT", val=PAR.DT) if self.mesh_properties.nproc != PAR.NPROC: warnings.warn("Specfem3D mesh nproc != PAR.NPROC") if "MULTIPLES" in PAR: raise NotImplementedError @property def mainsolver(self): """ Ensure that the main solver has a consistent reference inside Solver """ return self.source_names[0]
class Specfem2D(custom_import("solver", "base")): """ Python interface to Specfem2D. This subclass inherits functions from seisflows.solver.Base !!! See base class for method descriptions !!! """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type logger: Logger :param logger: Class-specific logging module, log statements pushed from this logger will be tagged by its specific module/classname """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("NT", required=True, par_type=float, docstr="Number of time steps set in the SPECFEM Par_file") sf.par("DT", required=True, par_type=float, docstr="Time step or delta set in the SPECFEM Par_file") sf.par("F0", required=True, par_type=float, docstr="Dominant source frequency") sf.par("FORMAT", required=True, par_type=float, docstr="Format of synthetic waveforms used during workflow, " "available options: ['ascii', 'su']") sf.par("SOURCE_PREFIX", required=False, default="SOURCE", par_type=str, docstr="Prefix of SOURCE files in path SPECFEM_DATA. By " "default, 'SOURCE' for SPECFEM2D") return sf def check(self, validate=True): """ Checks parameters and paths """ if validate: self.required.validate() super().check(validate=False) acceptable_formats = ["SU", "ASCII"] assert(PAR.FORMAT.upper() in acceptable_formats), \ f"FORMAT must be {acceptable_formats}" def check_solver_parameter_files(self): """ Checks SPECFEM2D Par_file for acceptable parameters and matches with the internally set parameters """ # Check the number of steps in the SPECFEM2D Par_file nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file") if int(nt) != PAR.NT: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {nt_str}=={nt} is not equal " f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file") if float(dt) != PAR.DT: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {dt_str}=={dt} is not equal " f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) # Check the central frequency in the SPECFEM2D SOURCE file f0_str, f0, f0_i = getpar(key="f0", file="DATA/SOURCE") if float(f0) != PAR.F0: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {f0_str}=={f0} is not equal " f"SeisFlows3 PAR.F0=={PAR.F0}. Please ensure " f"that these values match the DATA/SOURCE file.", header="parameter match error", border="=")) sys.exit(-1) # Ensure that NPROC matches the MESH values nproc = self.mesh_properties.nproc if nproc != PAR.NPROC: if self.taskid == 0: print( msg.cli( f"SPECFEM2D mesh NPROC=={nproc} is not equal" f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. " f"Please check that your mesh matches this val.", header="parameter match error", border="=")) sys.exit(-1) if "MULTIPLES" in PAR: if PAR.MULTIPLES: setpar(key="absorbtop", val=".false.", file="DATA/Par_file") else: setpar(key="absorbtop", val=".true.", file="DATA/Par_file") def generate_data(self, **model_kwargs): """ Generates data using the True model, exports traces to `traces/obs` :param model_kwargs: keyword arguments to pass to `generate_mesh` """ self.generate_mesh(**model_kwargs) unix.cd(self.cwd) setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file") call_solver(PAR.MPIEXEC, "bin/xmeshfem2D", output="mesher.log") call_solver(PAR.MPIEXEC, "bin/xspecfem2D", output="solver.log") if PAR.FORMAT.upper() == "SU": # Work around SPECFEM2D's version dependent file names for tag in ["d", "v", "a", "p"]: unix.rename(old=f"single_{tag}.su", new="single.su", names=glob(os.path.join("OUTPUT_FILES", "*.su"))) unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)), dst=os.path.join("traces", "obs")) if PAR.SAVETRACES: self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs")) def initialize_adjoint_traces(self): """ Setup utility: Creates the "adjoint traces" expected by SPECFEM. This is only done for the 'base' the Preprocess class. Note: Adjoint traces are initialized by writing zeros for all channels. Channels actually in use during an inversion or migration will be overwritten with nonzero values later on. """ super().initialize_adjoint_traces() unix.cd(self.cwd) unix.cd(os.path.join("traces", "adj")) # work around SPECFEM2D's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT.upper() == "SU": files = glob("*SU") unix.rename(old="_SU", new="_SU.adj", names=files) elif PAR.FORMAT.upper() == "ASCII": files = glob("*sem?") # Get the available extensions, which are named based on unit extensions = set([os.path.splitext(_)[-1] for _ in files]) for extension in extensions: unix.rename(old=extension, new=".adj", names=files) # SPECFEM2D requires that all components exist even if ununsed components = ["x", "y", "z", "p"] if PAR.FORMAT.upper() == "SU": for comp in components: src = f"U{PAR.COMPONENTS[0]}_file_single.su.adj" dst = f"U{comp.lower()}s_file_single.su.adj" if not exists(dst): unix.cp(src, dst) elif PAR.FORMAT.upper() == "ASCII": for fid in glob("*.adj"): net, sta, cha, ext = fid.split(".") for comp in components: # Replace the last value in the channel with new component cha_check = cha[:-1] + comp.upper() fid_check = ".".join([net, sta, cha_check, ext]) if not exists(fid_check): unix.cp(fid, fid_check) def generate_mesh(self, model_path, model_name, model_type='gll'): """ Performs meshing with internal mesher Meshfem2D and database generation :type model_path: str :param model_path: path to the model to be used for mesh generation :type model_name: str :param model_name: name of the model to be used as identification :type model_type: str :param model_type: available model types to be passed to the Specfem3D Par_file. See Specfem3D Par_file for available options. """ assert (exists(model_path)), f"model {model_path} does not exist" available_model_types = ["gll"] assert(model_type in available_model_types), \ f"{model_type} not in available types {available_model_types}" unix.cd(self.cwd) # Run mesh generation if model_type == "gll": self.check_mesh_properties(model_path) # Copy the model files (ex: proc000023_vp.bin ...) into DATA src = glob(os.path.join(model_path, "*")) dst = self.model_databases unix.cp(src, dst) # Export the model into output folder if self.taskid == 0: self.export_model(os.path.join(PATH.OUTPUT, model_name)) def forward(self, path='traces/syn'): """ Calls SPECFEM2D forward solver, exports solver outputs to traces dir :type path: str :param path: path to export traces to after completion of simulation """ setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem2D") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem2D") if PAR.FORMAT.upper() == "SU": # Work around SPECFEM2D's version dependent file names for tag in ["d", "v", "a", "p"]: unix.rename(old=f"single_{tag}.su", new="single.su", names=glob(os.path.join("OUTPUT_FILES", "*.su"))) unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)), dst=path) def adjoint(self): """ Calls SPECFEM2D adjoint solver, creates the `SEM` folder with adjoint traces which is required by the adjoint solver """ setpar(key="SIMULATION_TYPE", val="3", file="DATA/Par_file") setpar(key="SAVE_FORWARD", val=".false.", file="DATA/Par_file") unix.rm("SEM") unix.ln("traces/adj", "SEM") # Deal with different SPECFEM2D name conventions for regular traces and # "adjoint" traces if PAR.FORMAT.upper == "SU": unix.rename(old=".su", new=".su.adj", names=glob(os.path.join("traces", "adj", "*.su"))) call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem2D") call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem2D") def smooth(self, input_path, **kwargs): """ Specfem2D requires additional model parameters in directory to perform the xsmooth_sem task. This function will copy these files into the directory before performing the base smooth operations. Kwargs should match arguments of solver.base.smooth() .. note:: This operation is usually run with run(single=True) so only one task will be performing these operations. :type input_path: str :param input_path: path to data """ # Redundant to 'base' class but necessary if not exists(input_path): unix.mkdir(input_path) unix.cd(self.cwd) unix.cd("DATA") # Copy over only the files that are required. Won't execute if no match files = [] for tag in ["jacobian", "NSPEC_ibool", "x", "y", "z"]: files += glob(f"*_{tag}.bin") for src in files: unix.cp(src=src, dst=input_path) super().smooth(input_path=input_path, **kwargs) def import_model(self, path): """ File transfer utility to move a SPEFEM2D model into the correct location for a workflow. :type path: str :param path: path to the SPECFEM2D model :return: """ unix.cp(src=glob(os.path.join(path, "model", "*")), dst=os.path.join(self.cwd, "DATA")) def export_model(self, path): """ File transfer utility to move a SPEFEM2D model from the DATA directory to an external path location :type path: str :param path: path to export the SPECFEM2D model :return: """ unix.mkdir(path) unix.cp(src=glob(os.path.join(self.cwd, "DATA", "*.bin")), dst=path) @property def data_filenames(self): """ Returns the filenames of all data, either by the requested components or by all available files in the directory. .. note:: If the glob returns an empty list, this function exits the workflow because filenames should not be empty is they're being queried :rtype: list :return: list of data filenames """ unix.cd(self.cwd) unix.cd(os.path.join("traces", "obs")) if PAR.COMPONENTS: filenames = [] if PAR.FORMAT.upper() == "SU": for comp in PAR.COMPONENTS: filenames += [self.data_wildcard.format(comp=comp.lower())] # filenames += [f"U{comp.lower()}_file_single.su"] elif PAR.FORMAT.upper() == "ASCII": for comp in PAR.COMPONENTS: filenames += glob( self.data_wildcard.format(comp=comp.upper())) # filenames += glob(f"*.?X{comp.upper()}.sem?") else: filenames = glob(self.data_wildcard) if not filenames: print( msg.cli( "The property solver.data_filenames, used to search " "for traces in 'scratch/solver/*/traces' is empty " "and should not be. Please check solver parameters: ", items=[f"data_wildcard: {self.data_wildcard}"], header="data filenames error", border="=")) sys.exit(-1) return filenames @property def model_databases(self): """ The location of model inputs and outputs as defined by SPECFEM2D """ return os.path.join(self.cwd, "DATA") @property def kernel_databases(self): """ The location of kernel inputs and outputs as defined by SPECFEM2D """ return os.path.join(self.cwd, "OUTPUT_FILES") @property def data_wildcard(self, comp="?"): """ Returns a wildcard identifier for synthetic data based on SPECFEM2D file naming schema. Allows formatting dcomponent e.g., when called by solver.data_filenames :type comp: str :param comp: component formatter, defaults to wildcard '?' :rtype: str :return: wildcard identifier for channels """ if PAR.FORMAT.upper() == "SU": # return f"*.su" # too vague but maybe for a reason? -bryant return f"U{comp}_file_single.su" elif PAR.FORMAT.upper() == "ASCII": return f"*.?X{comp}.sem?" @property def source_prefix(self): """ Specfem2D's preferred source prefix :rtype: str :return: source prefix """ return PAR.SOURCE_PREFIX.upper()
class Inversion(custom_import("workflow", "base")): """ Waveform inversion base class Peforms iterative nonlinear inversion and provides a base class on top of which specialized strategies can be implemented. To allow customization, the inversion workflow is divided into generic methods such as "initialize", "finalize", "evaluate_function", "evaluate_gradient", which can be easily overloaded. Calls to forward and adjoint solvers are abstracted through the "solver" interface so that various forward modeling packages canf be used interchangeably. Commands for running in serial or parallel on a workstation or cluster are abstracted through the "system" interface. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("BEGIN", required=False, default=1, par_type=int, docstr="First iteration of workflow, 1 <= BEGIN <= inf") sf.par("END", required=True, par_type=int, docstr="Last iteration of workflow, BEGIN <= END <= inf") # Define the Paths required by this module sf.path("FUNC", required=False, default=os.path.join(PATH.SCRATCH, "evalfunc"), docstr="scratch path to store data related to function " "evaluations") sf.path("GRAD", required=False, default=os.path.join(PATH.SCRATCH, "evalgrad"), docstr="scratch path to store data related to gradient " "evaluations") sf.path("HESS", required=False, default=os.path.join(PATH.SCRATCH, "evalhess"), docstr="scratch path to store data related to Hessian " "evaluations") sf.path("OPTIMIZE", required=False, default=os.path.join(PATH.SCRATCH, "optimize"), docstr="scratch path to store data related to nonlinear " "optimization") return sf def check(self, validate=True): """ Checks parameters and paths """ super().check(validate=False) if validate: self.required.validate() for required_path in ["SCRATCH", "OUTPUT", "LOCAL"]: assert(required_path in PATH), \ f"Inversion requires path {required_path}" assert(1 <= PAR.BEGIN <= PAR.END), \ f"Incorrect BEGIN or END parameter: 1 <= {PAR.BEGIN} <= {PAR.END}" def main(self, return_flow=False): """ This function controls the main SeisFlows3 workflow, and is submitted to system by the call `seisflows submit` or `seisflows resume`. It proceeds to evaluate a list of functions in order until a User defined stop criteria is met. :type return_flow: bool :param return_flow: for CLI tool, simply returns the flow function rather than running the workflow. Used for print statements etc. """ # The workFLOW is a tuple of functions that can be called dynamic ally flow = (self.setup, self.initialize, self.evaluate_gradient, self.write_gradient, self.compute_direction, self.line_search, self.finalize, self.clean) if return_flow: return flow # Allow workflow resume from and stop after given flow functions start, stop = self.check_stop_resume_cond(flow) # Run the workflow until from the current iteration until PAR.END optimize.iter = PAR.BEGIN self.logger.info(msg.mjr("STARTING INVERSION WORKFLOW")) while True: self.logger.info(msg.mnr(f"ITERATION {optimize.iter} / {PAR.END}")) # Execute the functions within the flow for func in flow[start:stop]: func() # Finish. Assuming completion of all arguments in flow() self.logger.info(msg.mjr(f"FINISHED FLOW EXECUTION")) # Reset flow for subsequent iterations start, stop = None, None if optimize.iter >= PAR.END: break optimize.iter += 1 self.logger.info( msg.sub(f"INCREMENT ITERATION TO {optimize.iter}")) self.logger.info(msg.mjr("FINISHED INVERSION WORKFLOW")) def setup(self): """ Lays groundwork for inversion by running setup() functions for the involved sub-modules, generating True model synthetic data if necessary, and generating the pre-requisite database files. .. note:: This function should only be run one time, at the start of iter 1 """ # Iter check is done inside setup() so that we can include fx in FLOW if optimize.iter == 1: # Set up all the requisite modules from the master job self.logger.info(msg.mnr("PERFORMING MODULE SETUP")) preprocess.setup() postprocess.setup() optimize.setup() # Run solver.setup() in parallel self.logger.info("setting up solver on system...") system.run("solver", "setup") def initialize(self): """ Generates synthetics via a forward simulation, calculates misfits for the forward simulation. Writes misfit for use in optimization. """ self.logger.info(msg.mjr("INITIALIZING INVERSION")) self.evaluate_function(path=PATH.GRAD, suffix="new") def compute_direction(self): """ Computes search direction """ self.logger.info(msg.mnr("COMPUTING SEARCH DIRECTION")) optimize.compute_direction() def line_search(self): """ Conducts line search in given search direction Status codes: status > 0 : finished status == 0 : not finished status < 0 : failed """ # Calculate the initial step length based on optimization algorithm if optimize.line_search.step_count == 0: self.logger.info( msg.mjr(f"CONDUCTING LINE SEARCH " f"({optimize.eval_str})")) optimize.initialize_search() # Attempt a new trial step with the given step length optimize.line_search.step_count += 1 self.logger.info(msg.mnr(f"TRIAL STEP COUNT: {optimize.eval_str}")) self.evaluate_function(path=PATH.FUNC, suffix="try") # Check the function evaluation against line search history status = optimize.update_search() # Proceed based on the outcome of the line search if status > 0: self.logger.info("trial step successful") # Save outcome of line search to disk; reset step to 0 for next iter optimize.finalize_search() return elif status == 0: self.logger.info("retrying with new trial step") # Recursively call this function to attempt another trial step self.line_search() elif status < 0: if optimize.retry_status(): self.logger.info("line search failed. restarting line search") # Reset the line search machinery; set step count to 0 optimize.restart() self.line_search() else: self.logger.info("line search failed. aborting inversion.") sys.exit(-1) def evaluate_function(self, path, suffix): """ Performs forward simulation, and evaluates the objective function :type path: str :param path: path in the scratch directory to use for I/O :type suffix: str :param suffix: suffix to use for I/O """ self.logger.info(msg.sub("EVALUATE OBJECTIVE FUNCTION")) # Ensure that we are referencing the same tags as defined in OPTIMIZE model_tag = getattr(optimize, f"m_{suffix}") misfit_tag = getattr(optimize, f"f_{suffix}") self.write_model(path=path, tag=model_tag) self.logger.debug(f"evaluating objective function {PAR.NTASK} times " f"on system...") system.run("solver", "eval_func", path=path) self.write_misfit(path=path, tag=misfit_tag) def evaluate_gradient(self, path=None): """ Performs adjoint simulation to retrieve the gradient of the objective """ self.logger.info(msg.mnr("EVALUATING GRADIENT")) self.logger.debug( f"evaluating gradient {PAR.NTASK} times on system...") system.run("solver", "eval_grad", path=path or PATH.GRAD, export_traces=PAR.SAVETRACES) def finalize(self): """ Saves results from current model update iteration and increment the iteration number to set up for the next iteration. Finalization is expected to the be LAST function in workflow.main()'s flow list. """ self.logger.info(msg.mjr(f"FINALIZING ITERATION {optimize.iter}")) self.checkpoint() preprocess.finalize() # Save files from scratch before discarding if PAR.SAVEMODEL: self.save_model() if PAR.SAVEGRADIENT: self.save_gradient() if PAR.SAVEKERNELS: self.save_kernels() if PAR.SAVETRACES: self.save_traces() if PAR.SAVERESIDUALS: self.save_residuals() def clean(self): """ Cleans directories in which function and gradient evaluations were carried out """ self.logger.info(msg.mnr("CLEANING WORKDIR FOR NEXT ITERATION")) unix.rm(PATH.GRAD) unix.rm(PATH.FUNC) unix.mkdir(PATH.GRAD) unix.mkdir(PATH.FUNC) def checkpoint(self): """ Writes information to disk so workflow can be resumed following a break """ save() def write_model(self, path, tag): """ Writes model in format expected by solver :type path: str :param path: path to write the model to :type src: str :param src: name of the model to be saved, usually tagged as 'm' with a suffix depending on where in the inversion we are. e.g., 'm_try'. Expected that these tags are defined in OPTIMIZE module """ src = tag dst = os.path.join(path, "model") self.logger.debug(f"saving model '{src}' to:\n{dst}") solver.save(solver.split(optimize.load(src)), dst) def write_gradient(self): """ Writes gradient in format expected by non-linear optimization library. Calls the postprocess module, which will smooth/precondition gradient. """ self.logger.info(msg.mnr("POSTPROCESSING KERNELS")) src = os.path.join(PATH.GRAD, "gradient") dst = f"g_new" postprocess.write_gradient(PATH.GRAD) parts = solver.load(src, suffix="_kernel") optimize.save(dst, solver.merge(parts)) def write_misfit(self, path, tag): """ Writes misfit in format expected by nonlinear optimization library. Collects all misfit values within the given residuals directory and sums them in a manner chosen by the preprocess class. :type path: str :param path: path to write the misfit to :type tag: str :param tag: name of the model to be saved, usually tagged as 'f' with a suffix depending on where in the inversion we are. e.g., 'f_try'. Expected that these tags are defined in OPTIMIZE module """ self.logger.info("summing residuals with preprocess module") src = glob(os.path.join(path, "residuals", "*")) dst = tag total_misfit = preprocess.sum_residuals(src) self.logger.debug(f"saving misfit {total_misfit:.3E} to tag '{dst}'") optimize.savetxt(dst, total_misfit) def save_gradient(self): """ Save the gradient vector. Allows saving numpy array or standard Fortran .bin files Saving as a vector saves on file count, but requires numpy and seisflows functions to read """ dst = os.path.join(PATH.OUTPUT, f"gradient_{optimize.iter:04d}") if PAR.SAVEAS in ["binary", "both"]: src = os.path.join(PATH.GRAD, "gradient") unix.mv(src, dst) if PAR.SAVEAS in ["vector", "both"]: src = os.path.join(PATH.OPTIMIZE, optimize.g_old) unix.cp(src, dst + ".npy") self.logger.debug(f"saving gradient to path:\n{dst}") def save_model(self): """ Save the model vector. Allows saving numpy array or standard Fortran .bin files Saving as a vector saves on file count, but requires numpy and seisflows functions to read """ src = optimize.m_new dst = os.path.join(PATH.OUTPUT, f"model_{optimize.iter:04d}") self.logger.debug(f"saving model '{src}' to path:\n{dst}") if PAR.SAVEAS in ["binary", "both"]: solver.save(solver.split(optimize.load(src)), dst) if PAR.SAVEAS in ["vector", "both"]: np.save(file=dst, arr=optimize.load(src)) def save_kernels(self): """ Save the kernel vector as a Fortran binary file on disk """ src = os.path.join(PATH.GRAD, "kernels") dst = os.path.join(PATH.OUTPUT, f"kernels_{optimize.iter:04d}") self.logger.debug(f"saving kernels to path:\n{dst}") unix.mv(src, dst) def save_traces(self): """ Save the waveform traces to disk. !!! This doesn't work? Traces are not saved to PATH.GRAD so src does !!! not exist """ src = os.path.join(PATH.GRAD, "traces") dst = os.path.join(PATH.OUTPUT, f"traces_{optimize.iter:04d}") self.logger.debug(f"saving traces to path:\n{dst}") unix.mv(src, dst) def save_residuals(self): """ Save the residuals to disk """ src = os.path.join(PATH.GRAD, "residuals") dst = os.path.join(PATH.OUTPUT, f"residuals_{optimize.iter:04d}") self.logger.debug(f"saving residuals to path:\n{dst}") unix.mv(src, dst)
class Cluster(custom_import("system", "base")): """ Abstract base class for the Systems module which controls interaction with compute systems such as HPC clusters. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("WALLTIME", required=True, par_type=float, docstr="Maximum job time in minutes for main SeisFlows3 job") sf.par("TASKTIME", required=True, par_type=float, docstr="Maximum job time in minutes for each SeisFlows3 task") sf.par("NTASK", required=True, par_type=int, docstr="Number of separate, individual tasks. Also equal to " "the number of desired sources in workflow") sf.par("NPROC", required=True, par_type=int, docstr="Number of processor to use for each simulation") sf.par("ENVIRONS", required=False, default="", par_type=str, docstr="Optional environment variables to be provided in the" "following format VAR1=var1,VAR2=var2... Will be set" "using os.environs") return sf def check(self, validate=True): """ Checks parameters and paths """ if validate: self.required.validate() super().check(validate=False) def submit(self, submit_call): """ Main insertion point of SeisFlows3 onto the compute system. .. rubric:: $ seisflows submit .. note:: The expected behavior of the submit() function is to: 1) run system setup, creating directory structure, 2) execute workflow by submitting workflow.main() :type workflow: seisflows3.workflow :param workflow: an active seisflows3 workflow instance :type submit_call: str :param submit_call: the command line workload manager call to be run by subprocess. These need to be passed in by specific workload manager subclasses. """ self.setup() workflow = sys.modules["seisflows_workflow"] workflow.checkpoint() # check==True: subprocess will wait for workflow.main() to finish subprocess.run(submit_call, shell=True, check=True) def run(self, classname, method, **kwargs): """ Runs a task multiple times in parallel .. note:: The expected behavior of the run() function is to: submit N jobs to the system in parallel. For example, in a simulation step, run() submits N jobs to the compute system where N is the number of events requiring an adjoint simulation. :rtype: None :return: This function is not expected to return anything """ raise NotImplementedError('Must be implemented by subclass.') def taskid(self): """ Provides a unique identifier for each running task. This is compute system specific. :rtype: int :return: this function is expected to return a unique numerical identifier. """ raise NotImplementedError('Must be implemented by subclass.')
class Slurm(custom_import("system", "cluster")): """ Generalized interface for submitting jobs to and interfacing with a SLURM workload management system. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) sf.par("MPIEXEC", required=False, default="srun -u", par_type=str, docstr="Function used to invoke executables on the system. " "For example 'srun' on SLURM systems, or './' on a " "workstation. If left blank, will guess based on the " "system.") sf.par("NTASKMAX", required=False, default=100, par_type=int, docstr="Limit on the number of concurrent tasks in array") sf.par("NODESIZE", required=True, par_type=int, docstr="The number of cores per node defined by the system") sf.par("SLURMARGS", required=False, default="", par_type=str, docstr="Any optional, additional SLURM arguments that will be " "passed to the SBATCH scripts") return sf def submit(self, submit_call=None): """ Submits workflow as a single process master job :type workflow: module :param workflow: :type submit_call: str :param submit_call: subclasses (e.g., specific SLURM cluster subclasses) can overload the sbatch command line input by setting submit_call. If set to None, default submit_call will be set here. """ if submit_call is None: submit_call = " ".join([ f"sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}", f"--output={self.output_log}", f"--error={self.error_log}", f"--ntasks-per-node={PAR.NODESIZE}", f"--nodes=1", f"--time={PAR.WALLTIME:d}", f"{os.path.join(ROOT_DIR, 'scripts', 'submit')}", "--output {PATH.OUTPUT}" ]) self.logger.debug(submit_call) super().submit(submit_call) def run(self, classname, method, single=False, run_call=None, **kwargs): """ Runs task multiple times in embarrassingly parallel fasion on a SLURM cluster. Executes classname.method(*args, **kwargs) `NTASK` times, each time on `NPROC` CPU cores .. note:: The actual CLI call structure looks something like this $ sbatch --args scripts/run OUTPUT class method environs :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run :type single: bool :param single: run a single-process, non-parallel task, such as smoothing the gradient, which only needs to be run by once. This will change how the job array and the number of tasks is defined, such that the job is submitted as a single-core job to the system. :type run_call: str :param run_call: subclasses (e.g., specific SLURM cluster subclasses) can overload the sbatch command line input by setting run_call. If set to None, default run_call will be set here. """ self.checkpoint(PATH.OUTPUT, classname, method, kwargs) # Default sbatch command line input, can be overloaded by subclasses # Copy-paste this default run_call and adjust accordingly for subclass if run_call is None: run_call = " ".join([ "sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}", f"--nodes={math.ceil(PAR.NPROC/float(PAR.NODESIZE)):d}", f"--ntasks-per-node={PAR.NODESIZE:d}", f"--ntasks={PAR.NPROC:d}", f"--time={PAR.TASKTIME:d}", f"--output={os.path.join(PATH.WORKDIR, 'logs', '%A_%a')}", f"--array=0-{PAR.NTASK-1 % PAR.NTASKMAX}", f"{os.path.join(ROOT_DIR, 'scripts', 'run')}", f"--output {PATH.OUTPUT}", f"--classname {classname}", f"--funcname {method}", f"--environment {PAR.ENVIRONS or ''}" ]) self.logger.debug(run_call) # Single-process jobs simply need to replace a few sbatch arguments. # Do it AFTER `run_call` has been defined so that subclasses submitting # custom run calls can still benefit from this if single: self.logger.info("replacing parts of sbatch run call for single " "process job") for part in run_call.split(" "): if "--array" in part: run_call.replace(part, "--array=0-0") elif "--ntasks" in part: run_call.replace(part, "--ntasks=1") # Append taskid to environment variable, deal with the case where # PAR.ENVIRONS is an empty string task_id_str = "SEISFLOWS_TASKID=0" if not run_call.strip().endswith("--environment"): task_id_str = f",{task_id_str}" # appending to the list of vars run_call += task_id_str self.logger.debug(run_call) # The standard response from SLURM when submitting jobs # is something like 'Submitted batch job 441636', we want job number stdout = subprocess.run(run_call, stdout=subprocess.PIPE, text=True, shell=True).stdout job_ids = job_id_list(stdout, single) # Contiously check for job completion on ALL running array jobs is_done = False count = 0 bad_states = [ "TIMEOUT", "FAILED", "NODE_FAIL", "OUT_OF_MEMORY", "CANCELLED" ] while not is_done: # Wait a bit to avoid rapidly querying sacct time.sleep(5) is_done, states = job_array_status(job_ids) # EXIT CONDITION: if any of the jobs provide job failure codes if not is_done: for i, state in enumerate(states): # Sometimes states can be something like 'CANCELLED+', so # we can't do exact string matching, check partial matches if any([check in state for check in bad_states]): print( msg.cli((f"Stopping workflow for {state} job. " f"Please check log file for details."), items=[ f"TASK: {classname}.{method}", f"TASK ID: {job_ids[i]}", f"LOG: logs/{job_ids[i]}", f"SBATCH: {run_call}" ], header="slurm run error", border="=")) sys.exit(-1) # WAIT CONDITION: if sacct is not working, we'll get stuck in a loop if "UNDEFINED" in states: count += 1 # Every 10 counts, warn the user this is unexpected behavior if not count % 10: job_id = job_ids[states.index("UNDEFINED")] self.logger.warning( f"SLURM command 'sacct {job_id}' has " f"returned unexpected response {count} " f"times. This job may have failed " f"unexpectedly. Consider checking " f"manually") self.logger.info(f"Task {classname}.{method} finished successfully") def taskid(self): """ Provides a unique identifier for each running task :rtype: int :return: identifier for a given task """ # If not set, this environment variable will return None sftaskid = os.getenv("SEISFLOWS_TASKID") if sftaskid is None: sftaskid = os.getenv("SLURM_ARRAY_TASK_ID") if sftaskid is None: print( msg.cli( "system.taskid() environment variable not found. " "Assuming DEBUG mode and returning taskid==0. " "If not DEBUG mode, please check SYSTEM.run()", header="warning", border="=")) sftaskid = 0 return int(sftaskid)
class Test(custom_import("workflow", "base")): """ This is a template Base class """ # Class-specific logger accessed using self.logger # When this logger is called, e.g., self.logger.info("text"), the logging # package will know exactly which module, class and function the log # statement has been sent from, extraordinarily helpful for debugging. logger = logging.getLogger(__name__).getChild(__qualname__) @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. :rtype: seisflows.config.SeisFlowsPathsParameters :return: Paths and parameters that define the given class """ sf = SeisFlowsPathsParameters(super().required) return sf def check(self, validate=True): """ Checks parameters and paths. The validate function ensures that all required paths and parameters are accounted for, and that all optional paths and parameters are set to user-defined or default values. :type validate: bool :param validate: set required paths and parameters into sys.modules """ # The validate statement is used internally to set required paths # and parameters into sys.modules. Default values are stored for # optional terms if validate: self.required.validate() def main(self, return_flow=False): """ This controls the main testing workflow """ FLOW = [self.test_system] if return_flow: return FLOW for func in FLOW: func() def test_function(self): """ A simple function that can be called by system.run() """ print(f"Hello world, from taskid {system.taskid()}") def test_system(self): """ This is an example test function which can take any number of args or kwargs. The base class is responsible for setting all of the necessary functions """ system.run(classname="workflow", method="test_function") # Wait a bit for system to catch up time.sleep(3) system.run(classname="workflow", method="test_function", single=True)
class Lsf(custom_import("system", "cluster")): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. """ super().__init__() @property def required(self): """ Checks parameters and paths """ sf = SeisFlowsPathsParameters(super().required) sf.par("MPIEXEC", required=False, default="mpiexec", par_type=str, docstr="Function used to invoke executables on the system. " "For example 'srun' on SLURM systems, or './' on a " "workstation. If left blank, will guess based on the " "system.") # Define the Parameters required by this module sf.par("NTASKMAX", required=False, default=100, par_type=int, docstr="Limit on the number of concurrent tasks in array") sf.par("NODESIZE", required=True, par_type=int, docstr="The number of cores per node defined by the system") sf.par("LSFARGS", required=False, default="", par_type=str, docstr="Any optional, additional LSG arguments that will be " "passed to the LSF submit scripts") def submit(self, workflow): """ Submits workflow """ # Prepare 'bsub' arguments submit_call = " ".join([ f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}", f"-o {self.output_log}.log", f"-e {self.error_log}.log", f"-n {PAR.NODESIZE}", f'-R "span[ptile={PAR.NODESIZE}"', f"-W {PAR.WALLTIME:d}:00", os.path.join(findpath("seisflows.system"), "wrappers", "submit"), PATH.OUTPUT ]) super().submit(workflow, submit_call) def run(self, classname, method, *args, **kwargs): """ Runs task multiple times in embarrassingly parallel fasion on the maui cluster Executes classname.method(*args, **kwargs) NTASK times, each time on NPROC CPU cores :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run """ # Checkpoint this individual method before proceeding self.checkpoint(PATH.OUTPUT, classname, method, args, kwargs) # Submit job array run_call = " ".join([ f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}", f"-n {PAR.NPROC}", f'-R "span[ptile={PAR.NODESIZE}"', f"-W {PAR.TASKTIME:d}:00", f"-o {os.path.join(PATH.WORKDIR, 'output.logs', '%J_%I')}", f"[1-{PAR.NTASK}] % {PAR.NTASKMAX}", f"{os.path.join(findpath('seisflows.system'), 'wrappers', 'run')}", f"{PATH.OUTPUT}", f"{classname}", f"{method}", f"{PAR.ENVIRONS}" ]) stdout = subprocess.check_output(run_call, shell=True) # keep track of job ids jobs = self.job_id_list(stdout, PAR.NTASK) while True: # Wait seconds before checking status again time.sleep(30) self.timestamp() isdone, jobs = self.job_status(classname, method, jobs) if isdone: return def run_single(self, classname, method, *args, **kwargs): """ Runs task multiple times in embarrassingly parallel fasion Executes classname.method(*args, **kwargs) NTASK times, each time on NPROC cpu cores """ # Checkpoint this individual method before proceeding self.checkpoint(PATH.OUTPUT, classname, method, args, kwargs) # Submit job array run_call = " ".join([ f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}", f"-n {PAR.NPROC}", f'-R "span[ptile={PAR.NODESIZE}"', f"-W {PAR.TASKTIME:d}:00", f"-o {os.path.join(PATH.WORKDIR, 'output.logs', '%J')}", f"[1-1]", f"{os.path.join(findpath('seisflows.system'), 'wrappers', 'run')}", f"{PATH.OUTPUT}", f"{classname}", f"{method}", f"{PAR.ENVIRONS}" ]) stdout = check_output(run_call, shell=True) # keep track of job ids jobs = self.job_id_list(stdout, ntask=1) while True: # Wait seconds before checking status again time.sleep(30) self.timestamp() isdone, jobs = self.job_status(classname, method, jobs) if isdone: return def job_id_list(self, stdout, ntask): """ Parses job id list from sbatch standard output :type stdout: str :param stdout: the output of subprocess.check_output() :type ntask: int :param ntask: number of tasks currently running """ job = stdout.split()[1].strip()[1:-1] if ntask == 1: return [job] else: number_jobs = range(1, PAR.NSRC + 1) return ["{job}[{}]".format(_) for _ in number_jobs] def job_status(self, classname, method, jobs): """ Queries completion status of a single job :type job: str :param job: job id to query """ job_finished = [] for job in jobs: state = self._query(job) if state == "DONE": job_finished.append(True) else: job_finished.append(False) if state == "EXIT": print( msg.cli( f"LSF job {job} failed to execute " f"{classname}.{method}.", header="error", border="=")) sys.exit(-1) isdone = all(job_finished) return isdone, jobs def _query(self, jobid): """ Retrives job state from LSF database :type jobid: str :param jobid: job id to query LSF system about """ # Write the job status output to a temporary file with open(os.path.join(PATH.SYSTEM, "job_status", "w")) as f: call('bjobs -a -d "{jobid}"', stdout=f) # Read the job status back from the text file with open(os.path.join(PATH.SYSTEM, "job_status", "r")) as f: lines = f.readlines() state = lines[1].split()[2].strip() return state def taskid(self): """ Provides a unique identifier for each running task """ return int(os.getenv('LSB_JOBINDEX')) - 1 def timestamp(self): """ Timestamp the current running job """ with open(os.path.join(PATH.SYSTEM, "timestamps", "a")) as f: f.write(time.strftime("%H:%M:%S")) f.write("\n") def save_kwargs(self, classname, method, kwargs): """ Save key word arguments as a pickle object. :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run """ kwargspath = os.path.join(PATH.OUTPUT, "kwargs") kwargsfile = os.path.join(kwargspath, f"{classname}_{method}.p") unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class Pyatoa(custom_import("preprocess", "base")): """ Data preprocessing class using the Pyaflowa class within the Pyatoa package. In charge of data discovery, preprocessing, filtering, misfiti quantification and data storage. The User does not need to implement Pyatoa, but rather interacts with it via the parameters and paths of SeisFlows3. """ logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by __init__! Attributes are just initialized as NoneTypes for clarity and docstrings :param logger: Class-specific logging module, log statements pushed from this logger will be tagged by its specific module/classname """ pass @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters() # Define the Parameters required by this module sf.par("UNIT_OUTPUT", required=True, par_type=str, docstr="Data units. Must match the synthetic output of external " "solver. Available: ['DISP': displacement, " "'VEL': velocity, 'ACC': acceleration]") # TODO Check this against T0 in check() sf.par("START_PAD", required=False, default=0, par_type=float, docstr="For data gathering; time before origin time to gather. " "START_PAD >= T_0 in SPECFEM constants.h.in. " "Positive values only") # TODO set this automatically by setting equal NT * DT sf.par("END_PAD", required=True, par_type=float, docstr="For data gathering; time after origin time to gather. " "END_PAD >= NT * DT (of Par_file). Positive values only") sf.par("MIN_PERIOD", required=False, default="", par_type=float, docstr="Minimum filter corner in unit seconds. Bandpass filter " "if set with `MAX_PERIOD`, highpass filter if set " "without `MAX_PERIOD`, no filtering if not set and " "`MAX_PERIOD also not set") sf.par("MAX_PERIOD", required=False, default="", par_type=float, docstr="Maximum filter corner in unit seconds. Bandpass filter " "if set with `MIN_PERIOD`, lowpass filter if set " "without `MIN_PERIOD`, no filtering if not set and " "`MIN_PERIOD also not set") sf.par("CORNERS", required=False, default=4, par_type=int, docstr="Number of filter corners applied to filtering") sf.par("CLIENT", required=False, par_type=str, docstr="Client name for ObsPy FDSN data gathering. Pyatoa will " "attempt to collect waveform and metadata based on " "network and station codes provided in the SPECFEM " "STATIONS file. If set None, no FDSN gathering will be " "attempted") sf.par("ROTATE", required=False, default=False, par_type=bool, docstr="Attempt to rotate waveform components from NEZ -> RTZ") sf.par("PYFLEX_PRESET", required=False, default="default", par_type=str, docstr="Parameter map for misfit window configuration defined " "by Pyflex. IF None, misfit and adjoint sources will be " "calculated on whole traces. For available choices, " "see Pyatoa docs page (pyatoa.rtfd.io)") sf.par("FIX_WINDOWS", required=False, default=False, par_type="bool or str", docstr="How to address misfit window evaluation at each " "evaluation. Options to re-use misfit windows collected " "during an inversion, available options: " "[True, False, 'ITER', 'ONCE'] " "True: Re-use windows after first evaluation (i01s00); " "False: Calculate new windows each evaluation; " "'ITER': Calculate new windows at first evaluation of " "each iteration (e.g., i01s00... i02s00..." "'ONCE': Calculate new windows at first evaluation of " "the workflow, i.e., at PAR.BEGIN") sf.par("ADJ_SRC_TYPE", required=False, default="cc", par_type=str, docstr="Adjoint source type to evaluate misfit, defined by " "Pyadjoint. Currently available options: " "['cc': cross-correlation, 'mt': multitaper, " "wav: waveform']") sf.par("PLOT", required=False, default=True, par_type=bool, docstr="Attempt to plot waveforms and maps as PDF files at each " "function evaluation") sf.par("PYATOA_LOG_LEVEL", required=False, default="DEBUG", par_type=str, docstr="Log level to set Pyatoa, Pyflex, Pyadjoint. Available: " "['null': no logging, 'warning': warnings only, " "'info': task tracking, " "'debug': log all small details (recommended)]") # Parameters to control saving scratch/preprocess files to work dir. sf.par("SAVE_DATASETS", required=False, default=True, par_type=bool, docstr="Save PyASDF HDF5 datasets to disk. These datasets store " "waveform data, metadata, misfit windows, adjoint " "sources and configuration parameters") sf.par("SAVE_FIGURES", required=False, default=True, par_type=bool, docstr="Save output waveform figures to disk as PDFs") sf.par("SAVE_LOGS", required=False, default=True, par_type=bool, docstr="Save event-specific Pyatoa logs to disk as .txt files") # Define the Paths required by this module sf.path("PREPROCESS", required=False, default=os.path.join(PATH.SCRATCH, "preprocess"), docstr="scratch/ path to store waveform data and figures. " "Pyatoa will generate an internal directory structure " "here") sf.path("DATA", required=False, docstr="Directory to locally stored data. Pyatoa looks for " "waveform and metadata in the 'PATH.DATA/mseed' and " "'PATH.DATA/seed', directories respectively.") return sf def check(self, validate=True): """ Checks Parameter and Path files, will be run at the start of a Seisflows workflow to ensure that things are set appropriately. """ if validate: self.required.validate() # Check that other modules have set parameters that will be used here for required_parameter in ["COMPONENTS", "FORMAT"]: assert(required_parameter in PAR), \ f"Pyatoa requires {required_parameter}" assert(PAR.FORMAT.upper() == "ASCII"), \ "Pyatoa preprocess requires PAR.FORMAT=='ASCII'" assert((PAR.DT * PAR.NT) <= (PAR.START_PAD + PAR.END_PAD)), \ ("Pyatoa preprocess must have (PAR.START_PAD + PAR.END_PAD) >= " "(PAR.DT * PAR.NT), current values will not provide sufficiently " f"long data traces (DT*NT={PAR.DT * PAR.NT}; " f"START+END={PAR.START_PAD + PAR.END_PAD}") def setup(self): """ Sets up data preprocessing machinery by establishing an internally defined directory structure that will be used to store the outputs of the preprocessing workflow Akin to an __init__ class, but to be called externally by the workflow. """ unix.mkdir(PATH.PREPROCESS) def prepare_eval_grad(self, cwd, source_name, taskid, **kwargs): """ Prepare the gradient evaluation by gathering, preprocessing waveforms, and measuring misfit between observations and synthetics using Pyatoa. Reads in observed and synthetic waveforms, applies optional preprocessing, assesses misfit, and writes out adjoint sources and STATIONS_ADJOINT file. .. note:: Meant to be called by solver.eval_func(), may have unused arguments to keep functions general across preprocessing subclasses. :type cwd: str :param cwd: current specfem working directory containing observed and synthetic seismic data to be read and processed. Should be defined by solver.cwd :type source_name: str :param source_name: the event id to be used for tagging and data lookup. Should be defined by solver.source_name :type taskid: int :param taskid: identifier of the currently running solver instance. Should be defined by solver.taskid :type filenames: list of str :param filenames: [not used] list of filenames defining the files in traces """ if taskid == 0: self.logger.debug("preparing files for gradient evaluation with " "Pyaflowa") # Process all the stations for a given event using Pyaflowa pyaflowa = self.setup_event_pyaflowa(source_name) scaled_misfit = pyaflowa.process() if scaled_misfit is None: print(msg.cli(f"Event {source_name} returned no misfit, you may " f"want to check logs and waveform figures, " f"or consider discarding this event from your " f"workflow", items=[pyaflowa.paths.logs, pyaflowa.paths.figures], header="pyatoa preprocessing error", border="=")) sys.exit(-1) # Event misfit defined by Tape et al. (2010) written to solver dir. self.write_residuals(path=cwd, scaled_misfit=scaled_misfit) def setup_event_pyaflowa(self, source_name=None): """ A convenience function to set up a Pyaflowa processing instance for a specific event. .. note:: This is meant to be called by preprocess.prepare_eval_grad() but its also useful for debugging and manual processing where you can simply return a formatted Pyaflowa object and debug it directly. :type source_name: str :param source_name: solver source name to evaluate setup for. Must match from list defined by: solver.source_names """ # Late import because preprocess is loaded before optimize, # Optimize required to know which iteration/step_count we are at solver = sys.modules["seisflows_solver"] optimize = sys.modules["seisflows_optimize"] iteration = optimize.iter if source_name is None: source_name = solver.source_names[0] # Deal with the migration case where no step count given try: step_count = optimize.line_search.step_count except AttributeError: step_count = "" # Outsource data processing to an event-specfic Pyaflowa instance pyaflowa = Pyaflowa(sfpar=PAR, sfpath=PATH) pyaflowa.setup(source_name=source_name, iteration=iteration, step_count=step_count, loc="*", cha="*") return pyaflowa def finalize(self): """ Run some serial finalization tasks specific to Pyatoa, which will help aggregate the collection of output information. .. note:: This finalize function performs the following tasks: * Generate .csv files using the Inspector * Aggregate event-specific PDFs into a single evaluation PDF * Save scratch/ data into output/ if requested """ # Initiate Pyaflowa to get access to path structure pyaflowa = Pyaflowa(sfpar=PAR, sfpath=PATH) unix.cd(pyaflowa.paths.datasets) # Generate the Inspector from existing datasets and save to disk # Allow this is fail, which might happen if we don't have enough data # or the Dataset is not formatted as expected insp = Inspector(PAR.TITLE, verbose=False) try: insp.discover() insp.save() except Exception as e: self.logger.warning(f"Uncontrolled exception in Inspector creation " f"will not create inspector:\n{e}") # Make the final PDF for easier User ingestion of waveform/map figures pyaflowa.make_evaluation_composite_pdf() # Move scratch/ directory results into more permanent storage if PAR.SAVE_DATASETS: datasets = glob(os.path.join(pyaflowa.paths.datasets, "*.h5")) self._save_quantity(datasets, tag="datasets") if PAR.SAVE_FIGURES: figures = glob(os.path.join(pyaflowa.paths.figures, "*.pdf")) self._save_quantity(figures, tag="figures") if PAR.SAVE_LOGS: logs = glob(os.path.join(pyaflowa.paths.logs, "*.txt")) path_out = os.path.join(PATH.WORKDIR, CFGPATHS.LOGDIR) self._save_quantity(logs, path_out=path_out) def _save_quantity(self, filepaths, tag="", path_out=""): """ Repeatable convenience function to save quantities from the scratch/ directory to the output/ directory :type filepaths: list :param filepaths: full path to files that should be saved to output/ :type tag: str :param tag: tag for saving the files in PATH.OUTPUT. If not given, will save directly into the output/ directory :type path_out: str :param path_out: overwrite the default output path file naming """ if not path_out: path_out = os.path.join(PATH.OUTPUT, tag) if not os.path.exists(path_out): unix.mkdir(path_out) for src in filepaths: dst = os.path.join(path_out, os.path.basename(src)) unix.cp(src, dst) def write_residuals(self, path, scaled_misfit): """ Computes residuals and saves them to a text file in the appropriate path :type path: str :param path: scratch directory path, e.g. PATH.GRAD or PATH.FUNC :type scaled_misfit: float :param scaled_misfit: the summation of misfit from each source-receiver pair calculated by prepare_eval_grad() :type source_name: str :param source_name: name of the source related to the misfit, used for file naming """ residuals_file = os.path.join(path, "residuals") np.savetxt(residuals_file, [scaled_misfit], fmt="%11.6e") def sum_residuals(self, files): """ Averages the event misfits and returns the total misfit. Total misfit defined by Tape et al. (2010) :type files: str :param files: list of single-column text files containing residuals that will have been generated using prepare_eval_grad() :rtype: float :return: average misfit """ if len(files) != PAR.NTASK: print(msg.cli(f"Pyatoa preprocessing module did not recover the " f"correct number of residual files " f"({len(files)}/{PAR.NTASK}). Please check that " f"the preprocessing logs", header="error") ) sys.exit(-1) total_misfit = 0 for filename in files: total_misfit += np.sum(np.loadtxt(filename)) total_misfit /= PAR.NTASK return total_misfit
class NLCG(custom_import("optimize", "base")): """ Nonlinear conjugate gradient method Optimization Variables: m: model f: objective function value g: gradient direction p: search direction Line Search Variables: x: list of step lenths from current line search f: correpsonding list of function values m: number of step lengths in current line search n: number of model updates in optimization problem gtg: dot product of gradient with itself gtp: dot product of gradient and search direction Status codes status > 0 : finished status == 0 : not finished status < 0 : failed """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type NLCG_iter: Class :param NLCG_iter: an internally used iteration that differs from optimization iter. Keeps track of internal NLCG memory. """ super().__init__() self.NLCG_iter = 0 self.calc_beta = pollak_ribere # !!! Allow the user to choose this fx? @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) # Define the Parameters required by this module sf.par("NLCGMAX", required=False, default="null", par_type=float, docstr="NLCG periodic restart interval, between 1 and inf") sf.par("NLCGTHRESH", required=False, default="null", par_type=float, docstr="NLCG conjugacy restart threshold, between 1 and inf") return sf def check(self, validate=True): """ Checks parameters, paths, and dependencies """ if validate: self.required.validate() super().check(validate=False) assert(PAR.LINESEARCH.upper() == "BRACKET"), \ f"NLCG requires a bracketing line search algorithm" def compute_direction(self): """ Compute search direction using the Nonlinear Conjugate Gradient method The potential outcomes when computing direction with NLCG 1. First iteration of an NLCG optimization, search direction is the inverse gradient 2. NLCG internal iteration ticks over the maximum allowable number of iterations, force a restart condition, search direction is the inverse gradient 3. New NLCG search direction does not have conjugacy with previous search direction, force restart, inverse gradient search direction 4. New NLCG search direction is not a descent direction, force restart, inverse gradient search direction 5. New NLCG search direction has conjugacy and is a descent direction and is set as the new search direction. """ self.logger.debug(f"computing search direction with NLCG") self.NLCG_iter += 1 unix.cd(PATH.OPTIMIZE) # Load the current gradient direction g_new = self.load(self.g_new) # CASE 1: If first iteration, search direction is the current gradient if self.NLCG_iter == 1: self.logger.info("first NLCG iteration, setting search direction" "as inverse gradient") p_new = -g_new restarted = 0 # CASE 2: Force restart if the iterations have surpassed the maximum # number of allowable iter elif self.NLCG_iter > PAR.NLCGMAX: logger.info("restarting NLCG due to periodic restart condition. " "setting search direction as inverse gradient") self.restart() p_new = -g_new restarted = 1 # Normal NLCG direction compuitation else: # Compute search direction g_old = self.load(self.g_old) p_old = self.load(self.p_old) # Apply preconditioner and calc. scale factor for search dir. (beta) if self.precond: beta = self.calc_beta(g_new, g_old, self.precond) p_new = -self.precond(g_new) + beta * p_old else: beta = self.calc_beta(g_new, g_old) p_new = -g_new + beta * p_old # Check restart conditions, return search direction and status if check_conjugacy(g_new, g_old) > PAR.NLCGTHRESH: self.logger.info("restarting NLCG due to loss of conjugacy") self.restart() p_new = -g_new restarted = 1 elif check_descent(p_new, g_new) > 0.: self.logger.info("restarting NLCG, not a descent direction") self.restart() p_new = -g_new restarted = 1 else: p_new = p_new restarted = 0 # Save values to disk and memory self.save(self.p_new, p_new) self.restarted = restarted def restart(self): """ Overwrite the Base restart class and include a restart of the NLCG """ super().restart() self.NLCG_iter = 1
class ThriftyInversion(custom_import("workflow", "inversion")): """ Thrifty inversion which attempts to save resources by re-using previous line search results for the current iteration. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ :type thrifty: bool :param thrifty: the current status of the inversion. if False: assumed to be first iteration, a restart, or some other condition has been met which means inversion is defaulting to normal behavior if True: A well-scaled inversion can skip the function evaluation of the next iteration by using the line search results of the previous iteration """ super().__init__() self.thrifty = False def check(self, validate=True): """ Checks parameters and paths """ super().check(validate=False) if validate: self.required.validate() assert PAR.LINESEARCH == "Backtrack", \ "Thrifty inversion requires backtracking line search" def initialize(self): """ If line search can be carried over, skip initialization step Or if manually starting a new run, start with normal inversion init """ if not self.thrifty or optimize.iter == PAR.BEGIN: super().initialize() else: self.logger.info(msg.mjr("INITIALIZING THRIFTY INVERSION")) def clean(self): """ Determine if forward simulation from line search can be carried over. We assume clean() is the final flow() argument so that we can update the thrifty status here. """ self.update_status() if self.thrifty: self.logger.info( msg.mnr("THRIFTY CLEANING WORKDIR FOR NEXT " "ITERATION")) unix.rm(PATH.GRAD) unix.mv(PATH.FUNC, PATH.GRAD) unix.mkdir(PATH.FUNC) else: super().clean() def update_status(self): """ Determine if line search forward simulation can be carried over based on a number of criteria """ self.logger.info("updating thrifty inversion status") if optimize.iter == PAR.BEGIN: self.logger.info("1st iteration, defaulting to inversion workflow") thrifty = False elif optimize.restarted: self.logger.info("optimization has been restarted, defaulting to " "inversion workflow") thrifty = False elif optimize.iter == PAR.END: self.logger.info( "final iteration, defaulting to inversion workflow") thrifty = False else: self.logger.info("continuing with thrifty inversion workflow") thrifty = True self.thrifty = thrifty
class Workstation(custom_import("system", "base")): """ Run tasks in a serial fashion on a single local machine """ logger = logging.getLogger(__name__).getChild(__qualname__) @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) sf.par("MPIEXEC", required=False, default=None, par_type=str, docstr="Function used to invoke executables on the system. " "For example 'srun' on SLURM systems, or './' on a " "workstation. If left blank, will guess based on the " "system.") sf.par("NTASK", required=False, default=1, par_type=int, docstr="Number of separate, individual tasks. Also equal to " "the number of desired sources in workflow") sf.par("NPROC", required=False, default=1, par_type=int, docstr="Number of processor to use for each simulation") return sf def check(self, validate=True): """ Checks parameters and paths """ super().check(validate=False) if validate: self.required.validate() def submit(self): """ Submits the main workflow job """ self.setup() workflow = sys.modules["seisflows_workflow"] workflow.checkpoint() workflow.main() def run(self, classname, method, single=False, **kwargs): """ Executes task multiple times in serial. .. note:: kwargs will be passed to the underlying `method` that is called :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run :type single: bool :param single: run a single-process, non-parallel task, such as smoothing the gradient, which only needs to be run by once. This will change how the job array and the number of tasks is defined, such that the job is submitted as a single-core job to the system. """ self.checkpoint(PATH.OUTPUT, classname, method, kwargs) # Allows dynamic retrieval of any function from within package, e.g., # <bound method Base.eval_func of <seisflows3.solver.specfem2d... class_module = sys.modules[f"seisflows_{classname}"] function = getattr(class_module, method) if single: ntasks = 1 else: ntasks = PAR.NTASK for taskid in range(ntasks): # os environment variables can only be strings, these need to be # converted back to integers by system.taskid() os.environ["SEISFLOWS_TASKID"] = str(taskid) if taskid == 0: self.logger.info(f"running task {classname}.{method} " f"{PAR.NTASK} times") function(**kwargs) def taskid(self): """ Provides a unique identifier for each running task, which should be set by the 'run'' command. :rtype: int :return: returns the os environment variable SEISFLOWS_TASKID which is set by run() to label each of the currently running processes on the SYSTEM. """ sftaskid = os.getenv("SEISFLOWS_TASKID") if sftaskid is None: print( msg.cli( "system.taskid() environment variable not found. " "Assuming DEBUG mode and returning taskid==0. " "If not DEBUG mode, please check SYSTEM.run()", header="warning", border="=")) sftaskid = 0 return int(sftaskid)
class Chinook(custom_import("system", "slurm")): """ System interface for the University of Alaska HPC Chinook, which operates on a SLURM system. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type partitions: dict :param partitions: Chinook has various partitions which each have their own number of cores per compute node, defined here """ super().__init__() self.partitions = { "debug": 24, "t1small": 28, "t2small": 28, "t1standard": 40, "t2standard": 40, "analysis": 28 } @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) sf.par("PARTITION", required=False, default="t1small", par_type=int, docstr="Name of partition on main cluster, available: " "analysis, t1small, t2small, t1standard, t2standard, gpu") sf.par("MPIEXEC", required=False, default="srun", par_type=str, docstr="Function used to invoke parallel executables") return sf def check(self, validate=True): """ Checks parameters and paths """ if validate: self.required.validate() super().check(validate=False) assert(PAR.PARTITION in self.partitions.keys()), \ f"Chinook partition must be in {self.partitions.keys()}" assert(PAR.NODESIZE == self.partitions[PAR.PARTITION]), \ (f"PARTITION {PAR.PARTITION} is expected to have NODESIZE=" f"{self.partitions[PAR.PARTITION]}, not current {PAR.NODESIZE}")
class Migration(custom_import("workflow", "base")): """ Migration base class. Performs the workflow of an inversion up to the postprocessing. In the terminology of seismic exploration, implements a 'reverse time migration'. """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) return sf def main(self, return_flow=False): """s Migrates seismic data to generate sensitivity kernels :type return_flow: bool :param return_flow: for CLI tool, simply returns the flow function rather than running the workflow. Used for print statements etc. """ flow = ( self.setup, self.generate_synthetics, self.backproject, self.process_kernels, self.finalize, ) if return_flow: return flow # Allow workflow resume from and stop after given flow functions start, stop = self.check_stop_resume_cond(flow) # Run each argument in flow self.logger.info(msg.mjr("STARTING MIGRATION WORKFLOW")) for func in flow[start:stop]: func() self.logger.info(msg.mjr("FINISHED MIGRATION WORKFLOW")) def setup(self): """ Sets up the SeisFlows3 modules for the Migration """ # Set up all the requisite modules from the master job self.logger.info(msg.mnr("PERFORMING MODULE SETUP")) preprocess.setup() postprocess.setup() system.run("solver", "setup") def generate_synthetics(self): """ Performs forward simulation, and evaluates the objective function """ self.logger.info(msg.sub("PREPARING VELOCITY MODEL")) src = os.path.join(PATH.OUTPUT, "model_init") dst = os.path.join(PATH.SCRATCH, "model") assert os.path.exists(src) unix.cp(src, dst) self.logger.info(msg.sub("EVALUATE OBJECTIVE FUNCTION")) system.run("solver", "eval_func", path=PATH.SCRATCH, write_residuals=True) def backproject(self): """ Backproject or create kernels by running adjoint simulations """ self.logger.info(msg.sub("BACKPROJECT / EVALUATE GRADIENT")) system.run("solver", "eval_grad", path=PATH.SCRATCH, export_traces=PAR.SAVETRACES) def process_kernels(self): """ Backproject to create kernels from synthetics """ system.run("postprocess", "process_kernels", single=True, path=os.path.join(PATH.SCRATCH, "kernels"), parameters=solver.parameters) try: # TODO Figure out a better method for running this try except system.run("postprocess", "process_kernels", single=True, path=os.path.join(PATH.SCRATCH, "kernels"), parameters=["rhop"]) except: pass def finalize(self): """ Saves results from current model update iteration """ self.logger.info(msg.mnr("FINALIZING MIGRATION WORKFLOW")) if PAR.SAVETRACES: self.save_traces() if PAR.SAVEKERNELS: self.save_kernels() else: self.save_kernels_sum() def save_kernels_sum(self): """ Same summed kernels into the output directory """ src = os.path.join(PATH.SCRATCH, "kernels", "sum") dst = os.path.join(PATH.OUTPUT, "kernels") unix.mkdir(dst) unix.cp(src, dst) def save_kernels(self): """ Save individual kernels into the output directory """ src = os.path.join(PATH.SCRATCH, "kernels") dst = PATH.OUTPUT unix.mkdir(dst) unix.cp(src, dst) def save_traces(self): """ Save waveform traces into the output directory """ src = os.path.join(PATH.SCRATCH, "traces") dst = PATH.OUTPUT unix.cp(src, dst)
class ThriftyMaui(custom_import("workflow", "thrifty_inversion")): """ Waveform thrify inversion class specifically for running jobs on the New Zealand HPC cluster Maui. On Maui, Anaconda is only available on an ancillary cluster, Maui_ancil, so jobs involving the preprocessing module must be called through a separate system run call. """ def check(self): """ Ensure that the correct submodules are specified, otherwise this workflow won't function properly. """ super().check() if "MAUI" not in PAR.SYSTEM.upper(): raise ParameterError() if "MAUI" not in PAR.SOLVER.upper(): raise ParameterError() def setup(self): """ Lays groundwork for inversion by running setup() functions for the involved sub-modules, and generating synthetic true data if necessary, and generating the pre-requisite database files. Should only be run once at the iteration 1 """ # Set up all the requisite modules print("SETUP") preprocess.setup() postprocess.setup() optimize.setup() # Run the setup in serial to reduce unnecessary job submissions # Needs to be split up into multiple system calls solver.initialize_solver_directories() if PAR.CASE.upper() == "SYNTHETIC": system.run("solver", "setup", single=True, model="true") system.run("solver", "generate_data") system.run("solver", "setup", single=True, model="init") def evaluate_function(self, path, suffix): """ Performs forward simulation, and evaluates the objective function. Differs from Inversion.evaluate_function() as it splits the forward problem and misfit quantification into two separate system calls, rather than a single system call. :type path: str :param path: path in the scratch directory to use for I/O :type suffix: str :param suffix: suffix to use for I/O """ print("EVALUATE FUNCTION\n\tRunning forward simulation") self.write_model(path=path, suffix=suffix) system.run("solver", "eval_fwd", path=path) print("\tEvaluating misfit") system.run_ancil("solver", "eval_misfit", path=path) self.write_misfit(path=path, suffix=suffix)
class Specfem3DGlobe(custom_import("solver", "specfem3d")): """ Python interface to Specfem3D Globe. This subclass inherits functions from seisflows3.solver.specfem3d.Specfem3D !!! See base class for method descriptions !!! """ # Class-specific logger accessed using self.logger logger = logging.getLogger(__name__).getChild(__qualname__) def __init__(self): """ These parameters should not be set by the user. Attributes are initialized as NoneTypes for clarity and docstrings. :type logger: Logger :param logger: Class-specific logging module, log statements pushed from this logger will be tagged by its specific module/classname """ super().__init__() @property def required(self): """ A hard definition of paths and parameters required by this class, alongside their necessity for the class and their string explanations. """ sf = SeisFlowsPathsParameters(super().required) return sf def load(self, path, prefix="reg1_", suffix="", parameters=None): """ Reads SPECFEM model or kernel Models are stored in Fortran binary format and separated into multiple files according to material parameter and processor rank. :type path: str :param path: directory from which model is read :type prefix: str :param prefix: optional filename prefix :type suffix: str :param suffix: optional filename suffix, eg '_kernel' :type parameters: list :param parameters: material parameters to be read (if empty, defaults to self.parameters) :rtype: dict :return: model or kernels indexed by material parameter and processor rank, ie dict[parameter][iproc] """ parameters = parameters or self.parameters model = Model(parameters) minmax = Minmax(parameters) for iproc in range(self.mesh_properties.nproc): # read database files based on parameters keys, vals = loadbypar(path, self.parameters, iproc, prefix, suffix) for key, val in zip(keys, vals): model[key] += [val] minmax.update(keys, vals) return model def save(self, path, model, prefix="reg1_", suffix=""): """ Writes SPECFEM3D_GLOBE transerverly isotropic model :type path: str :param path: :type model :param model: :type prefix: str :param prefix: prefix that begins the name of the model parameters :type suffix: str :param suffix: that follow the name of model parameters """ unix.mkdir(path) for iproc in range(self.mesh_properties.nproc): for check_key in ["vpv", "vph", "vsv", "vsh", "eta"]: if check_key in self.parameters: savebin(model[key][iproc], path, iproc, prefix + key + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + key + suffix) if 'rho' in self.parameters: savebin(model['rho'][iproc], path, iproc, prefix + 'rho' + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + 'rho' + suffix) def check_mesh_properties(self, path=None, parameters=None): """ Determine if Mesh properties are okay for workflow :type path: str :param path: path to the mesh file """ if not hasattr(self, '_mesh_properties'): if path is None: path = PATH.MODEL_INIT if parameters is None: parameters = self.parameters nproc = 0 ngll = [] while True: dummy = loadbin(path, nproc, 'reg1_' + parameters[0]) ngll += [len(dummy)] nproc += 1 if not exists( os.path.join(path, f"proc{nrpoc}_reg1_{parameters[0]}.bin")): break self._mesh_properties = Struct([['nproc', nproc], ['ngll', ngll]]) def rename_data(self): """ Works around conflicting data filename conventions Specfem3D's uses different name conventions for regular traces and 'adjoint' traces """ files = glob(os.path.join(self.cwd, "traces", "adj", "*sem.ascii")) unix.rename("sem.ascii", "sem.ascii.adj", files) def initialize_adjoint_traces(self): """ Setup utility: Creates the "adjoint traces" expected by SPECFEM !!! This probably doesnt work Note: Adjoint traces are initialized by writing zeros for all channels. Channels actually in use during an inversion or migration will be overwritten with nonzero values later on. """ super().initialize_adjoint_traces() # workaround for SPECFEM's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT.upper() in ['ASCII', 'ascii']: files = glob(os.path.join(self.cwd, "traces", "adj", "*sem.ascii")) unix.rename("sem.ascii", "adj", files) @property def data_wildcard(self): """ Returns a wildcard identifier for synthetic data :rtype: str :return: wildcard identifier for channels """ if PAR.FORMAT.upper() == "ASCII": return f"*.?X?.sem.ascii" @property def data_filenames(self): """ Returns the filenames of all data, either by the requested components or by all available files in the directory. :rtype: list :return: list of data filenames """ unix.cd(os.path.join(self.cwd, "traces", "obs")) if PAR.FORMAT.upper() == "ASCII": return sorted(glob("*.???.sem.ascii"))