示例#1
0
def test_import(sfinit, modules):
    """
    Test code by importing all available classes for this module.
    If any of these fails then the module itself has some code error
    (e.g., syntax errors, inheritance errors).
    """
    sfinit
    for package, module_list in modules.items():
        for module in module_list:
            config.custom_import(MODULE, module)()
示例#2
0
def test_setup(sfinit, modules):
    """
    Test the expected behavior of each of the rqeuired functions.

    Setup: make sure that setup creates the necessary directory structure

    :param sfinit:
    :param modules:
    :return:
    """
    return
    sf = sfinit
    PATH = sys.modules["seisflows_paths"]
    SETUP_CREATES = [PATH.SCRATCH, PATH.SYSTEM, PATH.OUTPUT]

    for package, module_list in modules.items():
        for module in module_list:
            loaded_module = config.custom_import(MODULE, module)()

            # Make sure these don't already exist
            for path_ in SETUP_CREATES:
                assert(not os.path.exists(path_))

            loaded_module.setup()

            # Check that the minimum required directories were created
            for path_ in SETUP_CREATES:
                assert(os.path.exists(path_))

            # Remove created paths so we can check the next module
            for path_ in SETUP_CREATES:
                if os.path.isdir(path_):
                    shutil.rmtree(path_)
                else:
                    os.remove(path_)
示例#3
0
class Subclass(custom_import("MODULE NAME HERE", "PARENT CLASS NAME HERE")):
    """
    This is a template subclass
    """
    # Class-specific logger accessed using self.logger
    # When this logger is called, e.g., self.logger.info("text"), the logging
    # package will know exactly which module, class and function the log
    # statement has been sent from, extraordinarily helpful for debugging.
    logger = logging.getLogger(__name__).getChild(__qualname__)

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.

        :rtype: seisflows.config.SeisFlowsPathsParameters
        :return: Paths and parameters that define the given class

        """
        # The super().required argument ensures that the sublcass inherits the
        # paths and parameters defined by its parent class
        sf = SeisFlowsPathsParameters(super().required)

        # > Additional or overloading paths and parameters can be set here

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths. The validate function ensures that all
        required paths and parameters are accounted for, and that all
        optional paths and parameters are set to user-defined or default values.
        """
        if validate:
            self.required.validate()

        # Validation only required by the lowest subclass, which will validate
        # all the paths and parameters from each of its parent classes
        super.check(validate=False)

    def test(self, *args, **kwargs):
        """
        This is an example OVERWRITE of the base_class.test() function.
        If a super() statement is used, all the code within the base class
        will be run.
        """
        # The super statements calls the code chunk in base_class.test()
        # Here it will be executed before the remainder of sub_class.test() is
        # executed
        super.test()

        # Multiple logging levels determine how verbose the module will be
        self.logger.info("important log statement goes here")
        self.logger.debug("debugging log statement goes here")
        self.logger.warning("warnings can be passed here")
示例#4
0
def test_required_functions_exist(sfinit, modules):
    """
    Make sure that the named, required functions exist within the class
    Do not execute just make sure they're defined, because they will be
    expected by other modules
    """
    sfinit
    for package, module_list in modules.items():
        for module in module_list:
            loaded_module = config.custom_import(MODULE, module)()
            for func in REQUIRED_FUNCTIONS:
                assert(func in dir(loaded_module)), \
                    f"'{func}' is a required function in module: " \
                    f"{MODULE}.{module}"
示例#5
0
def test_required_parameters_exist(sfinit, modules):
    """
    Ensure that the required parameters are set in all the classes/subclasses
    That is, that the parameters defined above in REQUIRED_PARAMETERS have been
    defined by each SYSTEM class
    """
    sfinit
    for package, module_list in modules.items():
        for module in module_list:
            loaded_module = config.custom_import(MODULE, module)()
            sf_pp = loaded_module.required
            # Check that required parameters are set
            for req_par in REQUIRED_PARAMETERS:
                assert(req_par in sf_pp.parameters.keys()), \
                    f"{req_par} is a required parameter for module {MODULE}"
示例#6
0
def test_required_functions_exist(sfinit):
    """
    Make sure that the named, required functions exist within the class
    Do not execute just make sure they're defined, because they will be
    expected by other modules
    """
    sfinit
    for name in config.NAMES:
        for package, module_list in return_modules()[name].items():
            for module in module_list:
                loaded_module = config.custom_import(name, module)()
                # Check that required parameters are set
                for func in required_structure[name]["functions"]:
                    assert(func in dir(loaded_module)), \
                        f"{func} is a required function for module: " \
                        f"{name}.{module}"
示例#7
0
def test_required_parameters_exist(sfinit):
    """
    Ensure that the required parameters are set in all the classes/subclasses
    That is, that the parameters defined above in REQUIRED_PARAMETERS have been
    defined by each SYSTEM class
    """
    sfinit
    for name in config.NAMES:
        for package, module_list in return_modules()[name].items():
            for module in module_list:
                loaded_module = config.custom_import(name, module)()
                sf_pp = loaded_module.required
                # Check that required parameters are set
                for req_par in required_structure[name]["parameters"]:
                    assert(req_par in sf_pp.parameters.keys()), \
                        f"{req_par} is a required parameter for module: " \
                        f"{name}.{module}"
示例#8
0
def test_validate(sfinit, modules):
    """
    Test out path and parameter validation, essentially checking that all
    the paths and parameters are set properly

    .. note::
        This doesn't work because we have required parameters that are not
        set in the default parameter file. We can run configure beforehand
        but does that make sense?
    :return:
    """
    return
    sfinit
    for package, module_list in modules.items():
        for module in module_list:
            loaded_module = config.custom_import(MODULE, module)()
            from IPython import embed
            embed()
            loaded_module.required.validate()
示例#9
0
def test_custom_import(sfinit):
    """
    Test that importing based on internal modules works for various inputs
    :return:
    """
    sfinit
    with pytest.raises(SystemExit):
        config.custom_import()
    with pytest.raises(SystemExit):
        config.custom_import(name="NOT A VALID NAME")

    module = config.custom_import(name="optimize", module="LBFGS")
    assert (module.__name__ == "LBFGS")
    assert (module.__module__ == "seisflows3.optimize.LBFGS")

    # Check one more to be safe
    module = config.custom_import(name="optimize", module="base")
    assert (module.__name__ == "Base")
    assert (module.__module__ == "seisflows3.optimize.base")
示例#10
0
文件: LBFGS.py 项目: bch0w/seisflows
class LBFGS(custom_import("optimize", "base")):
    """
    The Limited memory BFGS algorithm
    Calls upon seisflows.plugin.optimize.LBFGS to accomplish LBFGS algorithm

    Includes optional safeguards: periodic restarting and descent conditions.

    To conserve memory, most vectors are read from disk rather than passed
    from a calling routine.

    L-BFGS Variables:
        s: memory of model differences
        y: memory of gradient differences

    Optimization Variables:
        m: model
        f: objective function value
        g: gradient direction
        p: search direction

    Line Search Variables:
        x: list of step lenths from current line search
        f: correpsonding list of function values
        m: number of step lengths in current line search
        n: number of model updates in optimization problem
        gtg: dot product of gradient with itself
        gtp: dot product of gradient and search direction

    Status codes
        status > 0  : finished
        status == 0 : not finished
        status < 0  : failed
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type LBFGS: Class
        :param LBFGS: plugin LBFGS class that controls the machinery of the
            L-BFGS optimization schema
        :type LBFGS_iter: int
        :param LBFGS_iter: an internally used iteration that differs from
            optimization iter. Keeps track of internal LBFGS memory of previous
            gradients. If LBFGS is restarted, the LBFGS_iter iteration is reset,
            but the optization iteration.
        :type memory_used: int
        :param memory_used: bookkeeping to see how many previous
            gradients have been stored to internal memory. Should not exceed
            PAR.LBFGSMEM
        :type LBFGS_dir: str
        :param LBFGS_dir: location to store LBFGS internal memory
        :type y_file: str
        :param y_file: path to store memory of the gradient differences
            i.e., `g_new - g_old`
        :type s_file: str
        :param s_file: path to store memory of the model differences
            i.e., `m_new - m_old`
        """
        super().__init__()
        self.LBFGS_iter = 0
        self.memory_used = 0
        self.LBFGS_dir = "LBFGS"
        self.y_file = os.path.join(self.LBFGS_dir, "Y")
        self.s_file = os.path.join(self.LBFGS_dir, "S")

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("LINESEARCH", required=False, default="Backtrack", par_type=str,
               docstr="Algorithm to use for line search, see "
                      "seisflows.plugins.line_search for available choices")

        sf.par("LBFGSMEM", required=False, default=3, par_type=int,
               docstr="Max number of previous gradients to retain "
                      "in local memory")

        sf.par("LBFGSMAX", required=False, par_type=int, default="inf",
               docstr="LBFGS periodic restart interval, between 1 and 'inf'")

        sf.par("LBFGSTHRESH", required=False, default=0., par_type=float,
               docstr="LBFGS angle restart threshold")

        return sf

    def check(self, validate=True):
        """
        Checks parameters, paths, and dependencies
        """
        super().check(validate=False)
        if validate:
            self.required.validate()

        assert(PAR.LINESEARCH.upper() == "BACKTRACK"), \
            "LBFGS requires a Backtracking line search"

    def setup(self):
        """
        Set up the LBFGS optimization schema
        """
        super().setup()

        # Create a separate directory for LBFGS matters
        unix.cd(PATH.OPTIMIZE)
        unix.mkdir(self.LBFGS_dir)

    def compute_direction(self):
        """
        Call on the L-BFGS optimization machinery to compute a search
        direction using internally stored memory of previous gradients.
        The potential outcomes when computing direction with L-BFGS

        1. First iteration of L-BFGS optimization, search direction is defined
            as the inverse gradient
        2. L-BFGS internal iteration ticks over the maximum allowable number of
            iterations, force a restart condition, search direction is the
            inverse gradient
        3. New search direction vector is too far from previous direction,
            force a restart, search direction is inverse gradient
        4. New search direction is acceptably angled from previous,
            becomes the new search direction
        """
        self.logger.info(f"computing search direction with L-BFGS")
        self.LBFGS_iter += 1

        unix.cd(PATH.OPTIMIZE)

        # Load the current gradient direction, which is the L-BFGS search
        # direction if this is the first iteration
        g = self.load(self.g_new)
        if self.LBFGS_iter == 1:
            self.logger.info("first L-BFGS iteration, setting search direction "
                             "as inverse gradient")
            p_new = -g
            restarted = 0
        # Restart condition or first iteration lead to setting search direction
        # as the inverse gradient (i.e., default to steepest descent)
        elif self.LBFGS_iter > PAR.LBFGSMAX:
            self.logger.info("restarting L-BFGS due to periodic restart "
                             "condition. setting search direction as"
                             "inverse gradient")
            self.restart()
            p_new = -g
            restarted = 1
        # Normal LBFGS direction computation
        else:
            # Update the search direction, apply the inverse Hessian such that
            # 'q' becomes the new search direction 'g'
            self.logger.info("applying inverse Hessian to gradient")
            s, y = self.update()
            q = self.apply(g, s, y)

            # Determine if the new search direction is appropriate by checking
            # its angle to the previous search direction
            if self.check_status(g, q):
                self.logger.info("new L-BFGS search direction found")
                p_new = -q
                restarted = 0
            else:
                self.logger.info("new search direction not appropriate, "
                                 "defaulting to inverse gradient")
                self.restart()
                p_new = -g
                restarted = 1

        # Save values to disk and memory
        self.save(self.p_new, p_new)
        self.restarted = restarted

    def restart(self):
        """
        On top of base restart class, include a restart of the LBFGS internal
        memory and memmaps
        """
        super().restart()

        self.logger.info("restarting L-BFGS optimization algorithm by clearing "
                         "internal memory")
        self.LBFGS_iter = 1
        self.memory_used = 0

        unix.cd(PATH.OPTIMIZE)
        s = np.memmap(filename=self.s_file, mode="r+")
        y = np.memmap(filename=self.y_file, mode="r+")
        s[:] = 0.
        y[:] = 0.

    def update(self):
        """
        Updates L-BFGS algorithm history

        .. note::
            Because models are large, and multiple iterations of models need to
            be stored in memory, previous models are stored as `memmaps`,
            which allow for access of small segments of large files on disk,
            without reading the entire file. Memmaps are array like objects.

        .. note::
            Notation for s and y taken from Liu & Nocedal 1989
            iterate notation: sk = x_k+1 - x_k and yk = g_k+1 - gk

        :rtype s: np.memmap
        :return s: memory of the model differences `m_new - m_old`
        :rtype y: np.memmap
        :return y: memory of the gradient differences `g_new - g_old`
        """
        unix.cd(PATH.OPTIMIZE)

        # Determine the iterates for model m and gradient g
        s_k = self.load(self.m_new) - self.load(self.m_old)
        y_k = self.load(self.g_new) - self.load(self.g_old)

        # Determine the shape of the memory map (length of model, length of mem)
        m = len(s_k)
        n = PAR.LBFGSMEM

        # Initial iteration, need to create the memory map
        if self.memory_used == 0:
            s = np.memmap(filename=self.s_file, mode="w+", dtype="float32",
                          shape=(m, n))
            y = np.memmap(filename=self.y_file, mode="w+", dtype="float32",
                          shape=(m, n))
            # Store the model and gradient differences in memmaps
            s[:, 0] = s_k
            y[:, 0] = y_k
            self.memory_used = 1
        # Subsequent iterations will append to memory maps
        else:
            s = np.memmap(filename=self.s_file, mode="r+", dtype="float32",
                          shape=(m, n))
            y = np.memmap(filename=self.y_file, mode="r+", dtype="float32",
                          shape=(m, n))
            # Shift all stored memory by one index to make room for latest mem
            s[:, 1:] = s[:, :-1]
            y[:, 1:] = y[:, :-1]
            # Store the latest model and gradient in first index
            s[:, 0] = s_k
            y[:, 0] = y_k

            # Keep track of the memory used
            if self.memory_used < PAR.LBFGSMEM:
                self.memory_used += 1

        return s, y

    def apply(self, q, s=None, y=None):
        """
        Applies L-BFGS inverse Hessian to given vector

        :type q: np.array
        :param q: gradient direction to apply L-BFGS to
        :type s: np.memmap
        :param s: memory of model differences
        :param s: memory of model differences
        :type y: np.memmap
        :param y: memory of gradient direction differences
        :rtype r: np.array
        :return r: new search direction from application of L-BFGS
        """
        unix.cd(PATH.OPTIMIZE)

        # If no memmaps are given as arguments, instantiate them
        if s is None or y is None:
            m = len(q)
            n = PAR.LBFGSMEM
            s = np.memmap(filename=self.s_file, mode="w+", dtype="float32",
                          shape=(m, n))
            y = np.memmap(filename=self.y_file, mode="w+", dtype="float32",
                          shape=(m, n))

        # First matrix product
        # Recursion step 2 from appendix A of Modrak & Tromp 2016
        kk = self.memory_used
        rh = np.zeros(kk)
        al = np.zeros(kk)
        for ii in range(kk):
            rh[ii] = 1 / np.dot(y[:, ii], s[:, ii])
            al[ii] = rh[ii] * np.dot(s[:, ii], q)
            q = q - al[ii] * y[:, ii]

        # Apply a preconditioner if available
        if self.precond:
            r = self.precond(q)
        else:
            r = q

        # Use scaling M3 proposed by Liu and Nocedal 1989
        sty = np.dot(y[:, 0], s[:, 0])
        yty = np.dot(y[:, 0], y[:, 0])
        r *= sty/yty

        # Second matrix product
        # Recursion step 4 from appendix A of Modrak & Tromp 2016
        for ii in range(kk - 1, -1, -1):
            be = rh[ii] * np.dot(y[:, ii], r)
            r = r + s[:, ii] * (al[ii] - be)

        return r

    def check_status(self, g, r):
        """
        Check the status of the apply() function, determine if restart necessary
        Return of False means restart, return of True means good to go.

        :type g: np.array
        :param g: current gradient direction
        :type r: np.array
        :param r: new gradient direction
        :rtype: bool
        :return: okay status based on status check (False==bad, True==good)
        """
        theta = 180. * np.pi ** -1 * angle(g, r)
        self.logger.info(f"new search direction: {theta:.2f}{DEG} from current")

        if not 0. < theta < 90.:
            self.logger.info("restarting L-BFGS, theta not a descent direction")
            okay = False
        elif theta > 90. - PAR.LBFGSTHRESH:
            self.logger.info("restarting L-BFGS due to practical safeguard")
            okay = False
        else:
            okay = True
        return okay
示例#11
0
class Specfem3D(custom_import("solver", "base")):
    """
    Python interface to Specfem3D Cartesian. This subclass inherits functions
    from seisflows3.solver.Base

    !!! See base class for method descriptions !!!
    """
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type logger: Logger
        :param logger: Class-specific logging module, log statements pushed
            from this logger will be tagged by its specific module/classname
        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("NT",
               required=True,
               par_type=float,
               docstr="Number of time steps set in the SPECFEM Par_file")

        sf.par("DT",
               required=True,
               par_type=float,
               docstr="Time step or delta set in the SPECFEM Par_file")

        sf.par("FORMAT",
               required=True,
               par_type=float,
               docstr="Format of synthetic waveforms used during workflow, "
               "available options: ['ascii', 'su']")

        sf.par("SOURCE_PREFIX",
               required=False,
               default="CMTSOLUTION",
               par_type=str,
               docstr="Prefix of SOURCE files in path SPECFEM_DATA. Available "
               "['CMTSOLUTION', FORCESOLUTION']")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        if validate:
            self.required.validate()
        super().check(validate=False)

        acceptable_formats = ["SU", "ASCII"]
        if PAR.FORMAT.upper() not in acceptable_formats:
            raise Exception(f"'FORMAT' must be {acceptable_formats}")

    def generate_data(self, **model_kwargs):
        """
        Generates data using the True model, exports traces to `traces/obs`

        :param model_kwargs: keyword arguments to pass to `generate_mesh`
        """
        # Create the mesh
        self.generate_mesh(**model_kwargs)

        # Run the Forward simulation
        unix.cd(self.cwd)
        setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file")
        if PAR.ATTENUATION:
            setpar(key="ATTENUATION", val=".true.", file="DATA/Par_file")
        else:
            setpar(key="ATTENUATION", val=".false.", file="DATA/Par_file")

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D")

        unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)),
                dst=os.path.join("traces", "obs"))

        # Export traces to disk for permanent storage
        if PAR.SAVETRACES:
            self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs"))

    def generate_mesh(self, model_path, model_name, model_type=None):
        """
        Performs meshing with internal mesher Meshfem3D and database generation

        :type model_path: str
        :param model_path: path to the model to be used for mesh generation
        :type model_name: str
        :param model_name: name of the model to be used as identification
        :type model_type: str
        :param model_type: available model types to be passed to the Specfem3D
            Par_file. See Specfem3D Par_file for available options.
        """
        available_model_types = ["gll"]

        assert (exists(model_path)), f"model {model_path} does not exist"

        model_type = model_type or getpar(key="MODEL", file="DATA/Par_file")
        assert(model_type in available_model_types), \
            f"{model_type} not in available types {available_model_types}"

        unix.cd(self.cwd)

        # Run mesh generation
        if model_type == "gll":
            self.check_mesh_properties(model_path)

            src = glob(os.path.join(model_path, "*"))
            dst = self.model_databases
            unix.cp(src, dst)

            call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem3D")
            call_solver(mpiexec=PAR.MPIEXEC,
                        executable="bin/xgenerate_databases")

        # Export the model for future use in the workflow
        if self.taskid == 0:
            self.export_model(os.path.join(PATH.OUTPUT, model_name))

    def eval_func(self, *args, **kwargs):
        """
        Call eval_func from Base class
        """
        super().eval_func(*args, **kwargs)

        # Work around SPECFEM3D conflicting name conventions of SU data
        self.rename_data()

    def forward(self, path="traces/syn"):
        """
        Calls SPECFEM3D forward solver, exports solver outputs to traces dir

        :type path: str
        :param path: path to export traces to after completion of simulation
        """
        # Set parameters and run forward simulation
        setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file")
        if PAR.ATTENUATION:
            setpar(key="ATTENUATION", val=".true.", file="DATA/Par_file")
        else:
            setpar(key="ATTENUATION", val=".false`.", file="DATA/Par_file")

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xgenerate_databases")
        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D")

        # Find and move output traces, by default to synthetic traces dir
        unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)),
                dst=path)

    def adjoint(self):
        """
        Calls SPECFEM3D adjoint solver, creates the `SEM` folder with adjoint
        traces which is required by the adjoint solver
        """
        setpar(key="SIMULATION_TYPE", val="3", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".false.", file="DATA/Par_file")
        setpar(key="ATTENUATION", val=".false.", file="DATA/Par_file")

        unix.rm("SEM")
        unix.ln("traces/adj", "SEM")

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D")

    def check_solver_parameter_files(self):
        """
        Checks solver parameters 
        """
        # Check the number of steps in the SPECFEM2D Par_file
        nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file")
        if int(nt) != PAR.NT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D {nt_str}=={nt} is not equal "
                        f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file")
        if float(dt) != PAR.DT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D {dt_str}=={dt} is not equal "
                        f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Ensure that NPROC matches the MESH values
        nproc = self.mesh_properties.nproc
        if nproc != PAR.NPROC:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D mesh NPROC=={nproc} is not equal "
                        f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. "
                        f"Please check that your mesh matches this val.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        if "MULTIPLES" in PAR:
            raise NotImplementedError

    def initialize_adjoint_traces(self):
        """
        Setup utility: Creates the "adjoint traces" expected by SPECFEM

        Note:
            Adjoint traces are initialized by writing zeros for all channels.
            Channels actually in use during an inversion or migration will be
            overwritten with nonzero values later on.
        """
        # Initialize adjoint traces as zeroes for all data_filenames
        # write to `traces/adj`
        super().initialize_adjoint_traces()

        # Rename data to work around Specfem naming convetions
        self.rename_data()

        # Workaround for Specfem3D's requirement that all components exist,
        # even ones not in use as adjoint traces
        if PAR.FORMAT.upper() == "SU":
            unix.cd(os.path.join(self.cwd, "traces", "adj"))

            for iproc in range(PAR.NPROC):
                for channel in ["x", "y", "z"]:
                    dst = f"{iproc:d}_d{channel}_SU.adj"
                    if not exists(dst):
                        src = f"{iproc:d}_d{PAR.COMPONENTS[0]}_SU.adj"
                        unix.cp(src, dst)

    def rename_data(self):
        """
        Works around conflicting data filename conventions

        Specfem3D's uses different name conventions for regular traces
        and 'adjoint' traces
        """
        if PAR.FORMAT.upper() == "SU":
            files = glob(os.path.join(self.cwd, "traces", "adj", "*SU"))
            unix.rename(old='_SU', new='_SU.adj', names=files)

    def write_parameters(self):
        """
        Write a set of parameters

        !!! This calls on plugins.solver.specfem3d.write_parameters()
            but that function doesn't exist !!!
        """
        unix.cd(self.cwd)
        solvertools.write_parameters(vars(PAR))

    def write_receivers(self):
        """
        Write a list of receivers into a text file

        !!! This calls on plugins.solver.specfem3d.write_receivers()
            but incorrect number of parameters is forwarded !!!
        """
        unix.cd(self.cwd)
        setpar(key="use_existing_STATIONS", val=".true", file="DATA/Par_file")

        _, h = preprocess.load("traces/obs")
        solvertools.write_receivers(h.nr, h.rx, h.rz)

    def write_sources(self):
        """
        Write sources to text file
        """
        unix.cd(self.cwd)
        _, h = preprocess.load(dir="traces/obs")
        solvertools.write_sources(PAR=vars(PAR), h=h)

    @property
    def data_wildcard(self):
        """
        Returns a wildcard identifier for synthetic data

        :rtype: str
        :return: wildcard identifier for channels
        """
        if PAR.FORMAT.upper() == "SU":
            return f"*_d?_SU"
        elif PAR.FORMAT.upper() == "ASCII":
            return f"*.?X?.sem?"

    @property
    def data_filenames(self):
        """
        Returns the filenames of all data, either by the requested components
        or by all available files in the directory.

        :rtype: list
        :return: list of data filenames
        """
        unix.cd(os.path.join(self.cwd, "traces", "obs"))

        if PAR.COMPONENTS:
            components = PAR.COMPONENTS

            if PAR.FORMAT.upper() == "SU":
                return sorted(glob(f"*_d[{components.lower()}]_SU"))
            elif PAR.FORMAT.upper() == "ASCII":
                return sorted(glob(f"*.?X[{components.upper()}].sem?"))
        else:
            if PAR.FORMAT.upper() == "SU":
                return sorted(glob("*_d?_SU"))
            elif PAR.FORMAT.upper() == "ASCII":
                return sorted(glob("*.???.sem?"))

    @property
    def kernel_databases(self):
        """
        The location of databases for kernel outputs, relative to the current
        working directory. 
        """
        return os.path.join(self.cwd, "OUTPUT_FILES", "DATABASES_MPI")

    @property
    def model_databases(self):
        """
        The location of databases for model outputs
        """
        return os.path.join(self.cwd, "OUTPUT_FILES", "DATABASES_MPI")

    @property
    def source_prefix(self):
        """
        Specfem3D's preferred source prefix

        :rtype: str
        :return: source prefix
        """
        return PAR.SOURCE_PREFIX.upper()
示例#12
0
class Specfem3DMaui(custom_import("solver", "specfem3d")):
    """
    Python interface to Specfem3D Cartesian. This subclass inherits functions
    from seisflows3.solver.specfem3d

    !!! See base class for method descriptions !!!
    """
    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        if validate:
            self.required.validate()
        super().check(validate=False)

    def setup(self, model):
        """
        Overload of solver.base.setup(), should be run as a single instance

        :type model: str
        :param model: "init", "true", generates the mesh to be used for workflow
            "true" used for synthetic-synthetic cases
            "init" for initial model, default
        :type model: str
        :param model: model to setup, either 'true' or 'init'
        """
        # Choice of model will determine which mesh to generate
        self.generate_mesh(model_path=getattr(PATH, f"MODEL_{model.upper()}"),
                           model_name=f"model_{model.lower()}",
                           model_type="gll")

        self.distribute_databases()

    def generate_data(self):
        """
        Overload seisflows.solver.base.generate_data. To be run in parallel
        
        Not used if PAR.CASE == "Data"

        Generates data in the synthetic-synthetic comparison case.
        Automatically calls generate mesh for the true model, rather than
        passing them in as kwargs.

        Also turns on attenuation for the forward model
        !!! attenuation could be moved into parameters.yaml? !!!
        """
        unix.cd(self.cwd)

        setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file")
        if PAR.ATTENUATION:
            setpar(key="ATTENUATION ", val=".true.", file="DATA/Par_file")
        else:
            setpar(key="ATTENUATION ", val=".false.", file="DATA/Par_file")

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem3D")

        # move ASCII .sem? files into appropriate directory
        unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)),
                dst=os.path.join("traces", "obs"))

        # Export traces to permanent storage on disk
        if PAR.SAVETRACES:
            self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs"))

    def generate_mesh(self, model_path, model_name, model_type='gll'):
        """
        Performs meshing and database generation as a serial task. Differs
        slightly from specfem3d class as it only creates database files for
        the main solver, which are then copied in serial by the function
        distribute_databases()

        :type model_path: str
        :param model_path: path to the model to be used for mesh generation
        :type model_name: str
        :param model_name: name of the model to be used as identification
        :type model_type: str
        :param model_type: available model types to be passed to the Specfem3D
            Par_file. See Specfem3D Par_file for available options.
        """
        available_model_types = ["gll"]

        assert (exists(model_path)), f"model {model_path} does not exist"

        model_type = model_type or getpar(key="MODEL", file="DATA/Par_file")
        assert(model_type in available_model_types), \
            f"{model_type} not in available types {available_model_types}"

        # Ensure that we're running on the main solver only
        assert (self.taskid == 0)

        unix.cd(self.cwd)

        # Check that the model parameter falls into the acceptable types
        par = getpar("MODEL").strip()
        assert(par in available_model_types), \
            f"Par_file {par} not in available types {available_model_types}"

        if par == "gll":
            self.check_mesh_properties(model_path)

            # Copy model files and then run xgenerate databases
            src = glob(os.path.join(model_path, "*"))
            dst = self.model_databases
            unix.cp(src, dst)

            call_solver(mpiexec=PAR.MPIEXEC,
                        executable="bin/xgenerate_databases")

        self.export_model(os.path.join(PATH.OUTPUT, model_name))

    def eval_misfit(self, path='', export_traces=False):
        """
        Performs function evaluation only, that is, the misfit quantification.
        Forward simulations are performed in a separate function

        :type path: str
        :param path: path in the scratch directory to use for I/O
        :type export_traces: bool
        :param export_traces: option to save the observation traces to disk
        :return:
        """
        preprocess.prepare_eval_grad(cwd=self.cwd,
                                     taskid=self.taskid,
                                     source_name=self.source_name,
                                     filenames=self.data_filenames)
        if export_traces:
            self.export_residuals(path)

    def eval_fwd(self, path=''):
        """
        High level solver interface

        Performans forward simulations only, function evaluation is split off
        into its own function

        :type path: str
        :param path: path in the scratch directory to use for I/O
        """
        unix.cd(self.cwd)
        self.import_model(path)
        self.forward()

    def distribute_databases(self):
        """
        A serial task to distrubute the database files outputted by 
        xgenerate_databases from main solver to all other solver directories
        """
        # Copy the database files but ignore any vt? files
        src_db = glob(
            os.path.join(PATH.SOLVER, self.mainsolver, "OUTPUT_FILES",
                         "DATABASES_MPI", "*"))
        for extension in [".vtu", ".vtk"]:
            src_db = [_ for _ in src_db if extension not in _]

        # Copy the .h files from the mesher, Specfem needs these as well
        src_h = glob(
            os.path.join(PATH.SOLVER, self.mainsolver, "OUTPUT_FILES", "*.h"))

        for source_name in self.source_names:
            # Ensure main solver is skipped
            if source_name == self.mainsolver:
                continue
            # Copy database files to each of the other source directories
            dst_db = os.path.join(PATH.SOLVER, source_name, "OUTPUT_FILES",
                                  "DATABASES_MPI", "")
            unix.cp(src_db, dst_db)

            # Copy mesher h files into the overlying directory
            dst_h = os.path.join(PATH.SOLVER, source_name, "OUTPUT_FILES", "")
            unix.cp(src_h, dst_h)

    def initialize_solver_directories(self):
        """
        Creates solver directories in serial using a single node.
        Should only be run by master job.

        Differs from Base initialize_solver_directories() as this serial task
        will create directory structures for each source, rather than having
        each source create its own. However the internal dir structure is the
        same.
        """
        for source_name in self.source_names:
            cwd = os.path.join(PATH.SOLVER, source_name)
            # Remove any existing scratch directory
            unix.rm(cwd)

            # Create internal directory structure, change into directory to make
            # all actions RELATIVE path actions
            unix.mkdir(cwd)
            unix.cd(cwd)
            for cwd_dir in [
                    "bin", "DATA", "OUTPUT_FILES/DATABASES_MPI", "traces/obs",
                    "traces/syn", "traces/adj"
            ]:
                unix.mkdir(cwd_dir)

            # Copy exectuables
            src = glob(os.path.join(PATH.SPECFEM_BIN, "*"))
            dst = os.path.join("bin", "")
            unix.cp(src, dst)

            # Copy all input files except source files
            src = glob(os.path.join(PATH.SPECFEM_DATA, "*"))
            src = [_ for _ in src if self.source_prefix not in _]
            dst = os.path.join("DATA", "")
            unix.cp(src, dst)

            # symlink event source specifically
            src = os.path.join(PATH.SPECFEM_DATA,
                               f"{self.source_prefix}_{source_name}")
            dst = os.path.join("DATA", self.source_prefix)
            unix.ln(src, dst)

            if source_name == self.mainsolver:
                # Symlink taskid_0 as mainsolver in solver directory
                unix.ln(source_name, os.path.join(PATH.SOLVER, "mainsolver"))
                # Only check the solver parameters once
                self.check_solver_parameter_files()

    def check_solver_parameter_files(self):
        """
        Checks solver parameters. Only slightly different to Specfem3D as it
        is run by the main task, not be an array process, so no need to check
        task_id
        """
        nt = getpar(key="NSTEP", cast=int)
        dt = getpar(key="DT", cast=float)

        if nt != PAR.NT:
            warnings.warn("Specfem3D NSTEP != PAR.NT\n"
                          "overwriting Specfem3D with Seisflows parameter")
            setpar(key="NSTEP", val=PAR.NT)

        if dt != PAR.DT:
            warnings.warn("Specfem3D DT != PAR.DT\n"
                          "overwriting Specfem3D with Seisflows parameter")
            setpar(key="DT", val=PAR.DT)

        if self.mesh_properties.nproc != PAR.NPROC:
            warnings.warn("Specfem3D mesh nproc != PAR.NPROC")

        if "MULTIPLES" in PAR:
            raise NotImplementedError

    @property
    def mainsolver(self):
        """
        Ensure that the main solver has a consistent reference inside Solver
        """
        return self.source_names[0]
示例#13
0
class Specfem2D(custom_import("solver", "base")):
    """
    Python interface to Specfem2D. This subclass inherits functions from
    seisflows.solver.Base

    !!! See base class for method descriptions !!!
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type logger: Logger
        :param logger: Class-specific logging module, log statements pushed
            from this logger will be tagged by its specific module/classname
        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("NT",
               required=True,
               par_type=float,
               docstr="Number of time steps set in the SPECFEM Par_file")

        sf.par("DT",
               required=True,
               par_type=float,
               docstr="Time step or delta set in the SPECFEM Par_file")

        sf.par("F0",
               required=True,
               par_type=float,
               docstr="Dominant source frequency")

        sf.par("FORMAT",
               required=True,
               par_type=float,
               docstr="Format of synthetic waveforms used during workflow, "
               "available options: ['ascii', 'su']")

        sf.par("SOURCE_PREFIX",
               required=False,
               default="SOURCE",
               par_type=str,
               docstr="Prefix of SOURCE files in path SPECFEM_DATA. By "
               "default, 'SOURCE' for SPECFEM2D")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        if validate:
            self.required.validate()

        super().check(validate=False)

        acceptable_formats = ["SU", "ASCII"]
        assert(PAR.FORMAT.upper() in acceptable_formats), \
            f"FORMAT must be {acceptable_formats}"

    def check_solver_parameter_files(self):
        """
        Checks SPECFEM2D Par_file for acceptable parameters and matches with
        the internally set parameters
        """
        # Check the number of steps in the SPECFEM2D Par_file
        nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file")
        if int(nt) != PAR.NT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {nt_str}=={nt} is not equal "
                        f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file")
        if float(dt) != PAR.DT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {dt_str}=={dt} is not equal "
                        f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Check the central frequency in the SPECFEM2D SOURCE file
        f0_str, f0, f0_i = getpar(key="f0", file="DATA/SOURCE")
        if float(f0) != PAR.F0:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {f0_str}=={f0} is not equal "
                        f"SeisFlows3 PAR.F0=={PAR.F0}. Please ensure "
                        f"that these values match the DATA/SOURCE file.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Ensure that NPROC matches the MESH values
        nproc = self.mesh_properties.nproc
        if nproc != PAR.NPROC:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D mesh NPROC=={nproc} is not equal"
                        f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. "
                        f"Please check that your mesh matches this val.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        if "MULTIPLES" in PAR:
            if PAR.MULTIPLES:
                setpar(key="absorbtop", val=".false.", file="DATA/Par_file")
            else:
                setpar(key="absorbtop", val=".true.", file="DATA/Par_file")

    def generate_data(self, **model_kwargs):
        """
        Generates data using the True model, exports traces to `traces/obs`

        :param model_kwargs: keyword arguments to pass to `generate_mesh`
        """
        self.generate_mesh(**model_kwargs)

        unix.cd(self.cwd)
        setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file")

        call_solver(PAR.MPIEXEC, "bin/xmeshfem2D", output="mesher.log")
        call_solver(PAR.MPIEXEC, "bin/xspecfem2D", output="solver.log")

        if PAR.FORMAT.upper() == "SU":
            # Work around SPECFEM2D's version dependent file names
            for tag in ["d", "v", "a", "p"]:
                unix.rename(old=f"single_{tag}.su",
                            new="single.su",
                            names=glob(os.path.join("OUTPUT_FILES", "*.su")))

        unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)),
                dst=os.path.join("traces", "obs"))

        if PAR.SAVETRACES:
            self.export_traces(os.path.join(PATH.OUTPUT, "traces", "obs"))

    def initialize_adjoint_traces(self):
        """
        Setup utility: Creates the "adjoint traces" expected by SPECFEM.
        This is only done for the 'base' the Preprocess class.

        Note:
            Adjoint traces are initialized by writing zeros for all channels.
            Channels actually in use during an inversion or migration will be
            overwritten with nonzero values later on.
        """
        super().initialize_adjoint_traces()

        unix.cd(self.cwd)
        unix.cd(os.path.join("traces", "adj"))

        # work around SPECFEM2D's use of different name conventions for
        # regular traces and 'adjoint' traces
        if PAR.FORMAT.upper() == "SU":
            files = glob("*SU")
            unix.rename(old="_SU", new="_SU.adj", names=files)
        elif PAR.FORMAT.upper() == "ASCII":
            files = glob("*sem?")

            # Get the available extensions, which are named based on unit
            extensions = set([os.path.splitext(_)[-1] for _ in files])
            for extension in extensions:
                unix.rename(old=extension, new=".adj", names=files)

        # SPECFEM2D requires that all components exist even if ununsed
        components = ["x", "y", "z", "p"]

        if PAR.FORMAT.upper() == "SU":
            for comp in components:
                src = f"U{PAR.COMPONENTS[0]}_file_single.su.adj"
                dst = f"U{comp.lower()}s_file_single.su.adj"
                if not exists(dst):
                    unix.cp(src, dst)
        elif PAR.FORMAT.upper() == "ASCII":
            for fid in glob("*.adj"):
                net, sta, cha, ext = fid.split(".")
                for comp in components:
                    # Replace the last value in the channel with new component
                    cha_check = cha[:-1] + comp.upper()
                    fid_check = ".".join([net, sta, cha_check, ext])
                    if not exists(fid_check):
                        unix.cp(fid, fid_check)

    def generate_mesh(self, model_path, model_name, model_type='gll'):
        """
        Performs meshing with internal mesher Meshfem2D and database generation

        :type model_path: str
        :param model_path: path to the model to be used for mesh generation
        :type model_name: str
        :param model_name: name of the model to be used as identification
        :type model_type: str
        :param model_type: available model types to be passed to the Specfem3D
            Par_file. See Specfem3D Par_file for available options.
        """
        assert (exists(model_path)), f"model {model_path} does not exist"

        available_model_types = ["gll"]
        assert(model_type in available_model_types), \
            f"{model_type} not in available types {available_model_types}"

        unix.cd(self.cwd)

        # Run mesh generation
        if model_type == "gll":
            self.check_mesh_properties(model_path)

            # Copy the model files (ex: proc000023_vp.bin ...) into DATA
            src = glob(os.path.join(model_path, "*"))
            dst = self.model_databases
            unix.cp(src, dst)

        # Export the model into output folder
        if self.taskid == 0:
            self.export_model(os.path.join(PATH.OUTPUT, model_name))

    def forward(self, path='traces/syn'):
        """
        Calls SPECFEM2D forward solver, exports solver outputs to traces dir

        :type path: str
        :param path: path to export traces to after completion of simulation
        """
        setpar(key="SIMULATION_TYPE", val="1", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".true.", file="DATA/Par_file")

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem2D")
        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem2D")

        if PAR.FORMAT.upper() == "SU":
            # Work around SPECFEM2D's version dependent file names
            for tag in ["d", "v", "a", "p"]:
                unix.rename(old=f"single_{tag}.su",
                            new="single.su",
                            names=glob(os.path.join("OUTPUT_FILES", "*.su")))

        unix.mv(src=glob(os.path.join("OUTPUT_FILES", self.data_wildcard)),
                dst=path)

    def adjoint(self):
        """
        Calls SPECFEM2D adjoint solver, creates the `SEM` folder with adjoint
        traces which is required by the adjoint solver
        """
        setpar(key="SIMULATION_TYPE", val="3", file="DATA/Par_file")
        setpar(key="SAVE_FORWARD", val=".false.", file="DATA/Par_file")

        unix.rm("SEM")
        unix.ln("traces/adj", "SEM")

        # Deal with different SPECFEM2D name conventions for regular traces and
        # "adjoint" traces
        if PAR.FORMAT.upper == "SU":
            unix.rename(old=".su",
                        new=".su.adj",
                        names=glob(os.path.join("traces", "adj", "*.su")))

        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xmeshfem2D")
        call_solver(mpiexec=PAR.MPIEXEC, executable="bin/xspecfem2D")

    def smooth(self, input_path, **kwargs):
        """
        Specfem2D requires additional model parameters in directory to perform
        the xsmooth_sem task. This function will copy these files into the 
        directory before performing the base smooth operations. 

        Kwargs should match arguments of solver.base.smooth()
        
        .. note::
            This operation is usually run with run(single=True) so only one
            task will be performing these operations.

        :type input_path: str
        :param input_path: path to data
        """
        # Redundant to 'base' class but necessary
        if not exists(input_path):
            unix.mkdir(input_path)

        unix.cd(self.cwd)
        unix.cd("DATA")

        # Copy over only the files that are required. Won't execute if no match
        files = []
        for tag in ["jacobian", "NSPEC_ibool", "x", "y", "z"]:
            files += glob(f"*_{tag}.bin")
        for src in files:
            unix.cp(src=src, dst=input_path)

        super().smooth(input_path=input_path, **kwargs)

    def import_model(self, path):
        """
        File transfer utility to move a SPEFEM2D model into the correct location
        for a workflow.

        :type path: str
        :param path: path to the SPECFEM2D model
        :return:
        """
        unix.cp(src=glob(os.path.join(path, "model", "*")),
                dst=os.path.join(self.cwd, "DATA"))

    def export_model(self, path):
        """
        File transfer utility to move a SPEFEM2D model from the DATA directory
        to an external path location

        :type path: str
        :param path: path to export the SPECFEM2D model
        :return:
        """
        unix.mkdir(path)
        unix.cp(src=glob(os.path.join(self.cwd, "DATA", "*.bin")), dst=path)

    @property
    def data_filenames(self):
        """
        Returns the filenames of all data, either by the requested components
        or by all available files in the directory.

        .. note:: 
            If the glob returns an  empty list, this function exits the 
            workflow because filenames should  not be empty is they're being 
            queried

        :rtype: list
        :return: list of data filenames
        """
        unix.cd(self.cwd)
        unix.cd(os.path.join("traces", "obs"))

        if PAR.COMPONENTS:
            filenames = []
            if PAR.FORMAT.upper() == "SU":
                for comp in PAR.COMPONENTS:
                    filenames += [self.data_wildcard.format(comp=comp.lower())]
                    # filenames += [f"U{comp.lower()}_file_single.su"]
            elif PAR.FORMAT.upper() == "ASCII":
                for comp in PAR.COMPONENTS:
                    filenames += glob(
                        self.data_wildcard.format(comp=comp.upper()))
                    # filenames += glob(f"*.?X{comp.upper()}.sem?")
        else:
            filenames = glob(self.data_wildcard)

        if not filenames:
            print(
                msg.cli(
                    "The property solver.data_filenames, used to search "
                    "for traces in 'scratch/solver/*/traces' is empty "
                    "and should not be. Please check solver parameters: ",
                    items=[f"data_wildcard: {self.data_wildcard}"],
                    header="data filenames error",
                    border="="))
            sys.exit(-1)

        return filenames

    @property
    def model_databases(self):
        """
        The location of model inputs and outputs as defined by SPECFEM2D
        """
        return os.path.join(self.cwd, "DATA")

    @property
    def kernel_databases(self):
        """
        The location of kernel inputs and outputs as defined by SPECFEM2D
        """
        return os.path.join(self.cwd, "OUTPUT_FILES")

    @property
    def data_wildcard(self, comp="?"):
        """
        Returns a wildcard identifier for synthetic data based on SPECFEM2D
        file naming schema. Allows formatting dcomponent e.g., 
        when called by solver.data_filenames

        :type comp: str
        :param comp: component formatter, defaults to wildcard '?'
        :rtype: str
        :return: wildcard identifier for channels
        """
        if PAR.FORMAT.upper() == "SU":
            # return f"*.su"  # too vague but maybe for a reason? -bryant
            return f"U{comp}_file_single.su"
        elif PAR.FORMAT.upper() == "ASCII":
            return f"*.?X{comp}.sem?"

    @property
    def source_prefix(self):
        """
        Specfem2D's preferred source prefix

        :rtype: str
        :return: source prefix
        """
        return PAR.SOURCE_PREFIX.upper()
示例#14
0
class Inversion(custom_import("workflow", "base")):
    """
    Waveform inversion base class

    Peforms iterative nonlinear inversion and provides a base class on top
    of which specialized strategies can be implemented.

    To allow customization, the inversion workflow is divided into generic
    methods such as "initialize", "finalize", "evaluate_function",
    "evaluate_gradient", which can be easily overloaded.

    Calls to forward and adjoint solvers are abstracted through the "solver"
    interface so that various forward modeling packages canf be used
    interchangeably.

    Commands for running in serial or parallel on a workstation or cluster
    are abstracted through the "system" interface.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.
        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("BEGIN",
               required=False,
               default=1,
               par_type=int,
               docstr="First iteration of workflow, 1 <= BEGIN <= inf")

        sf.par("END",
               required=True,
               par_type=int,
               docstr="Last iteration of workflow, BEGIN <= END <= inf")

        # Define the Paths required by this module
        sf.path("FUNC",
                required=False,
                default=os.path.join(PATH.SCRATCH, "evalfunc"),
                docstr="scratch path to store data related to function "
                "evaluations")

        sf.path("GRAD",
                required=False,
                default=os.path.join(PATH.SCRATCH, "evalgrad"),
                docstr="scratch path to store data related to gradient "
                "evaluations")

        sf.path("HESS",
                required=False,
                default=os.path.join(PATH.SCRATCH, "evalhess"),
                docstr="scratch path to store data related to Hessian "
                "evaluations")

        sf.path("OPTIMIZE",
                required=False,
                default=os.path.join(PATH.SCRATCH, "optimize"),
                docstr="scratch path to store data related to nonlinear "
                "optimization")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        super().check(validate=False)
        if validate:
            self.required.validate()

        for required_path in ["SCRATCH", "OUTPUT", "LOCAL"]:
            assert(required_path in PATH), \
                f"Inversion requires path {required_path}"

        assert(1 <= PAR.BEGIN <= PAR.END), \
            f"Incorrect BEGIN or END parameter: 1 <= {PAR.BEGIN} <= {PAR.END}"

    def main(self, return_flow=False):
        """
        This function controls the main SeisFlows3 workflow, and is submitted
        to system by the call `seisflows submit` or `seisflows resume`. It
        proceeds to evaluate a list of functions in order until a User defined
        stop criteria is met.

        :type return_flow: bool
        :param return_flow: for CLI tool, simply returns the flow function
            rather than running the workflow. Used for print statements etc.
        """
        # The workFLOW is a tuple of functions that can be called dynamic ally
        flow = (self.setup, self.initialize, self.evaluate_gradient,
                self.write_gradient, self.compute_direction, self.line_search,
                self.finalize, self.clean)
        if return_flow:
            return flow

        # Allow workflow resume from and stop after given flow functions
        start, stop = self.check_stop_resume_cond(flow)

        # Run the workflow until from the current iteration until PAR.END
        optimize.iter = PAR.BEGIN
        self.logger.info(msg.mjr("STARTING INVERSION WORKFLOW"))
        while True:
            self.logger.info(msg.mnr(f"ITERATION {optimize.iter} / {PAR.END}"))

            # Execute the functions within the flow
            for func in flow[start:stop]:
                func()

            # Finish. Assuming completion of all arguments in flow()
            self.logger.info(msg.mjr(f"FINISHED FLOW EXECUTION"))

            # Reset flow for subsequent iterations
            start, stop = None, None

            if optimize.iter >= PAR.END:
                break

            optimize.iter += 1
            self.logger.info(
                msg.sub(f"INCREMENT ITERATION TO {optimize.iter}"))

        self.logger.info(msg.mjr("FINISHED INVERSION WORKFLOW"))

    def setup(self):
        """
        Lays groundwork for inversion by running setup() functions for the 
        involved sub-modules, generating True model synthetic data if necessary,
        and generating the pre-requisite database files.

        .. note::
            This function should only be run one time, at the start of iter 1
        """
        # Iter check is done inside setup() so that we can include fx in FLOW
        if optimize.iter == 1:
            # Set up all the requisite modules from the master job
            self.logger.info(msg.mnr("PERFORMING MODULE SETUP"))
            preprocess.setup()
            postprocess.setup()
            optimize.setup()

            # Run solver.setup() in parallel
            self.logger.info("setting up solver on system...")
            system.run("solver", "setup")

    def initialize(self):
        """
        Generates synthetics via a forward simulation, calculates misfits
        for the forward simulation. Writes misfit for use in optimization.
        """
        self.logger.info(msg.mjr("INITIALIZING INVERSION"))
        self.evaluate_function(path=PATH.GRAD, suffix="new")

    def compute_direction(self):
        """
        Computes search direction
        """
        self.logger.info(msg.mnr("COMPUTING SEARCH DIRECTION"))
        optimize.compute_direction()

    def line_search(self):
        """
        Conducts line search in given search direction

        Status codes:
            status > 0  : finished
            status == 0 : not finished
            status < 0  : failed
        """
        # Calculate the initial step length based on optimization algorithm
        if optimize.line_search.step_count == 0:
            self.logger.info(
                msg.mjr(f"CONDUCTING LINE SEARCH "
                        f"({optimize.eval_str})"))
            optimize.initialize_search()

        # Attempt a new trial step with the given step length
        optimize.line_search.step_count += 1
        self.logger.info(msg.mnr(f"TRIAL STEP COUNT: {optimize.eval_str}"))
        self.evaluate_function(path=PATH.FUNC, suffix="try")

        # Check the function evaluation against line search history
        status = optimize.update_search()

        # Proceed based on the outcome of the line search
        if status > 0:
            self.logger.info("trial step successful")
            # Save outcome of line search to disk; reset step to 0 for next iter
            optimize.finalize_search()
            return
        elif status == 0:
            self.logger.info("retrying with new trial step")
            # Recursively call this function to attempt another trial step
            self.line_search()
        elif status < 0:
            if optimize.retry_status():
                self.logger.info("line search failed. restarting line search")
                # Reset the line search machinery; set step count to 0
                optimize.restart()
                self.line_search()
            else:
                self.logger.info("line search failed. aborting inversion.")
                sys.exit(-1)

    def evaluate_function(self, path, suffix):
        """
        Performs forward simulation, and evaluates the objective function

        :type path: str
        :param path: path in the scratch directory to use for I/O
        :type suffix: str
        :param suffix: suffix to use for I/O
        """
        self.logger.info(msg.sub("EVALUATE OBJECTIVE FUNCTION"))

        # Ensure that we are referencing the same tags as defined in OPTIMIZE
        model_tag = getattr(optimize, f"m_{suffix}")
        misfit_tag = getattr(optimize, f"f_{suffix}")

        self.write_model(path=path, tag=model_tag)

        self.logger.debug(f"evaluating objective function {PAR.NTASK} times "
                          f"on system...")
        system.run("solver", "eval_func", path=path)

        self.write_misfit(path=path, tag=misfit_tag)

    def evaluate_gradient(self, path=None):
        """
        Performs adjoint simulation to retrieve the gradient of the objective 
        """
        self.logger.info(msg.mnr("EVALUATING GRADIENT"))

        self.logger.debug(
            f"evaluating gradient {PAR.NTASK} times on system...")
        system.run("solver",
                   "eval_grad",
                   path=path or PATH.GRAD,
                   export_traces=PAR.SAVETRACES)

    def finalize(self):
        """
        Saves results from current model update iteration and increment the
        iteration number to set up for the next iteration. Finalization is
        expected to the be LAST function in workflow.main()'s  flow list.
        """
        self.logger.info(msg.mjr(f"FINALIZING ITERATION {optimize.iter}"))

        self.checkpoint()
        preprocess.finalize()

        # Save files from scratch before discarding
        if PAR.SAVEMODEL:
            self.save_model()

        if PAR.SAVEGRADIENT:
            self.save_gradient()

        if PAR.SAVEKERNELS:
            self.save_kernels()

        if PAR.SAVETRACES:
            self.save_traces()

        if PAR.SAVERESIDUALS:
            self.save_residuals()

    def clean(self):
        """
        Cleans directories in which function and gradient evaluations were
        carried out
        """
        self.logger.info(msg.mnr("CLEANING WORKDIR FOR NEXT ITERATION"))

        unix.rm(PATH.GRAD)
        unix.rm(PATH.FUNC)
        unix.mkdir(PATH.GRAD)
        unix.mkdir(PATH.FUNC)

    def checkpoint(self):
        """
        Writes information to disk so workflow can be resumed following a break
        """
        save()

    def write_model(self, path, tag):
        """
        Writes model in format expected by solver

        :type path: str
        :param path: path to write the model to
        :type src: str
        :param src: name of the model to be saved, usually tagged as 'm' with
            a suffix depending on where in the inversion we are. e.g., 'm_try'.
            Expected that these tags are defined in OPTIMIZE module
        """
        src = tag
        dst = os.path.join(path, "model")
        self.logger.debug(f"saving model '{src}' to:\n{dst}")
        solver.save(solver.split(optimize.load(src)), dst)

    def write_gradient(self):
        """
        Writes gradient in format expected by non-linear optimization library.
        Calls the postprocess module, which will smooth/precondition gradient.
        """
        self.logger.info(msg.mnr("POSTPROCESSING KERNELS"))
        src = os.path.join(PATH.GRAD, "gradient")
        dst = f"g_new"

        postprocess.write_gradient(PATH.GRAD)
        parts = solver.load(src, suffix="_kernel")

        optimize.save(dst, solver.merge(parts))

    def write_misfit(self, path, tag):
        """
        Writes misfit in format expected by nonlinear optimization library.
        Collects all misfit values within the given residuals directory and sums
        them in a manner chosen by the preprocess class.

        :type path: str
        :param path: path to write the misfit to
        :type tag: str
        :param tag: name of the model to be saved, usually tagged as 'f' with
            a suffix depending on where in the inversion we are. e.g., 'f_try'.
            Expected that these tags are defined in OPTIMIZE module
        """
        self.logger.info("summing residuals with preprocess module")
        src = glob(os.path.join(path, "residuals", "*"))
        dst = tag
        total_misfit = preprocess.sum_residuals(src)

        self.logger.debug(f"saving misfit {total_misfit:.3E} to tag '{dst}'")
        optimize.savetxt(dst, total_misfit)

    def save_gradient(self):
        """
        Save the gradient vector. Allows saving numpy array or standard
        Fortran .bin files

        Saving as a vector saves on file count, but requires numpy and seisflows
        functions to read
        """
        dst = os.path.join(PATH.OUTPUT, f"gradient_{optimize.iter:04d}")

        if PAR.SAVEAS in ["binary", "both"]:
            src = os.path.join(PATH.GRAD, "gradient")
            unix.mv(src, dst)
        if PAR.SAVEAS in ["vector", "both"]:
            src = os.path.join(PATH.OPTIMIZE, optimize.g_old)
            unix.cp(src, dst + ".npy")

        self.logger.debug(f"saving gradient to path:\n{dst}")

    def save_model(self):
        """
        Save the model vector. Allows saving numpy array or standard
        Fortran .bin files

        Saving as a vector saves on file count, but requires numpy and seisflows
        functions to read
        """
        src = optimize.m_new
        dst = os.path.join(PATH.OUTPUT, f"model_{optimize.iter:04d}")

        self.logger.debug(f"saving model '{src}' to path:\n{dst}")

        if PAR.SAVEAS in ["binary", "both"]:
            solver.save(solver.split(optimize.load(src)), dst)
        if PAR.SAVEAS in ["vector", "both"]:
            np.save(file=dst, arr=optimize.load(src))

    def save_kernels(self):
        """
        Save the kernel vector as a Fortran binary file on disk
        """
        src = os.path.join(PATH.GRAD, "kernels")
        dst = os.path.join(PATH.OUTPUT, f"kernels_{optimize.iter:04d}")

        self.logger.debug(f"saving kernels to path:\n{dst}")

        unix.mv(src, dst)

    def save_traces(self):
        """
        Save the waveform traces to disk.

        !!! This doesn't work? Traces are not saved to PATH.GRAD so src does
        !!! not exist
        """
        src = os.path.join(PATH.GRAD, "traces")
        dst = os.path.join(PATH.OUTPUT, f"traces_{optimize.iter:04d}")

        self.logger.debug(f"saving traces to path:\n{dst}")

        unix.mv(src, dst)

    def save_residuals(self):
        """
        Save the residuals to disk
        """
        src = os.path.join(PATH.GRAD, "residuals")
        dst = os.path.join(PATH.OUTPUT, f"residuals_{optimize.iter:04d}")

        self.logger.debug(f"saving residuals to path:\n{dst}")

        unix.mv(src, dst)
示例#15
0
class Cluster(custom_import("system", "base")):
    """
    Abstract base class for the Systems module which controls interaction with
    compute systems such as HPC clusters.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("WALLTIME",
               required=True,
               par_type=float,
               docstr="Maximum job time in minutes for main SeisFlows3 job")

        sf.par("TASKTIME",
               required=True,
               par_type=float,
               docstr="Maximum job time in minutes for each SeisFlows3 task")

        sf.par("NTASK",
               required=True,
               par_type=int,
               docstr="Number of separate, individual tasks. Also equal to "
               "the number of desired sources in workflow")

        sf.par("NPROC",
               required=True,
               par_type=int,
               docstr="Number of processor to use for each simulation")

        sf.par("ENVIRONS",
               required=False,
               default="",
               par_type=str,
               docstr="Optional environment variables to be provided in the"
               "following format VAR1=var1,VAR2=var2... Will be set"
               "using os.environs")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        if validate:
            self.required.validate()

        super().check(validate=False)

    def submit(self, submit_call):
        """
        Main insertion point of SeisFlows3 onto the compute system.

        .. rubric::
            $ seisflows submit

        .. note::
            The expected behavior of the submit() function is to:
            1) run system setup, creating directory structure,
            2) execute workflow by submitting workflow.main()

        :type workflow: seisflows3.workflow
        :param workflow: an active seisflows3 workflow instance
        :type submit_call: str
        :param submit_call: the command line workload manager call to be run by
            subprocess. These need to be passed in by specific workload manager
            subclasses.
        """
        self.setup()
        workflow = sys.modules["seisflows_workflow"]
        workflow.checkpoint()

        # check==True: subprocess will wait for workflow.main() to finish
        subprocess.run(submit_call, shell=True, check=True)

    def run(self, classname, method, **kwargs):
        """
        Runs a task multiple times in parallel

        .. note::
            The expected behavior of the run() function is to: submit N jobs to
            the system in parallel. For example, in a simulation step, run()
            submits N jobs to the compute system where N is the number of
            events requiring an adjoint simulation.

        :rtype: None
        :return: This function is not expected to return anything
        """
        raise NotImplementedError('Must be implemented by subclass.')

    def taskid(self):
        """
        Provides a unique identifier for each running task. This is
        compute system specific.

        :rtype: int
        :return: this function is expected to return a unique numerical
            identifier.
        """
        raise NotImplementedError('Must be implemented by subclass.')
示例#16
0
class Slurm(custom_import("system", "cluster")):
    """
    Generalized interface for submitting jobs to and interfacing with a SLURM
    workload management system.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.
        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        sf.par("MPIEXEC",
               required=False,
               default="srun -u",
               par_type=str,
               docstr="Function used to invoke executables on the system. "
               "For example 'srun' on SLURM systems, or './' on a "
               "workstation. If left blank, will guess based on the "
               "system.")

        sf.par("NTASKMAX",
               required=False,
               default=100,
               par_type=int,
               docstr="Limit on the number of concurrent tasks in array")

        sf.par("NODESIZE",
               required=True,
               par_type=int,
               docstr="The number of cores per node defined by the system")

        sf.par("SLURMARGS",
               required=False,
               default="",
               par_type=str,
               docstr="Any optional, additional SLURM arguments that will be "
               "passed to the SBATCH scripts")

        return sf

    def submit(self, submit_call=None):
        """
        Submits workflow as a single process master job

        :type workflow: module
        :param workflow:
        :type submit_call: str
        :param submit_call: subclasses (e.g., specific SLURM cluster subclasses)
            can overload the sbatch command line input by setting
            submit_call. If set to None, default submit_call will be set here.
        """
        if submit_call is None:
            submit_call = " ".join([
                f"sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}",
                f"--output={self.output_log}", f"--error={self.error_log}",
                f"--ntasks-per-node={PAR.NODESIZE}", f"--nodes=1",
                f"--time={PAR.WALLTIME:d}",
                f"{os.path.join(ROOT_DIR, 'scripts', 'submit')}",
                "--output {PATH.OUTPUT}"
            ])
            self.logger.debug(submit_call)

        super().submit(submit_call)

    def run(self, classname, method, single=False, run_call=None, **kwargs):
        """
        Runs task multiple times in embarrassingly parallel fasion on a SLURM
        cluster. Executes classname.method(*args, **kwargs) `NTASK` times,
        each time on `NPROC` CPU cores

        .. note::
            The actual CLI call structure looks something like this
            $ sbatch --args scripts/run OUTPUT class method environs

        :type classname: str
        :param classname: the class to run
        :type method: str
        :param method: the method from the given `classname` to run
        :type single: bool
        :param single: run a single-process, non-parallel task, such as
            smoothing the gradient, which only needs to be run by once.
            This will change how the job array and the number of tasks is
            defined, such that the job is submitted as a single-core job to
            the system.
        :type run_call: str
        :param run_call: subclasses (e.g., specific SLURM cluster subclasses)
            can overload the sbatch command line input by setting
            run_call. If set to None, default run_call will be set here.
        """
        self.checkpoint(PATH.OUTPUT, classname, method, kwargs)

        # Default sbatch command line input, can be overloaded by subclasses
        # Copy-paste this default run_call and adjust accordingly for subclass
        if run_call is None:
            run_call = " ".join([
                "sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}",
                f"--nodes={math.ceil(PAR.NPROC/float(PAR.NODESIZE)):d}",
                f"--ntasks-per-node={PAR.NODESIZE:d}",
                f"--ntasks={PAR.NPROC:d}", f"--time={PAR.TASKTIME:d}",
                f"--output={os.path.join(PATH.WORKDIR, 'logs', '%A_%a')}",
                f"--array=0-{PAR.NTASK-1 % PAR.NTASKMAX}",
                f"{os.path.join(ROOT_DIR, 'scripts', 'run')}",
                f"--output {PATH.OUTPUT}", f"--classname {classname}",
                f"--funcname {method}", f"--environment {PAR.ENVIRONS or ''}"
            ])
            self.logger.debug(run_call)

        # Single-process jobs simply need to replace a few sbatch arguments.
        # Do it AFTER `run_call` has been defined so that subclasses submitting
        # custom run calls can still benefit from this
        if single:
            self.logger.info("replacing parts of sbatch run call for single "
                             "process job")
            for part in run_call.split(" "):
                if "--array" in part:
                    run_call.replace(part, "--array=0-0")
                elif "--ntasks" in part:
                    run_call.replace(part, "--ntasks=1")
            # Append taskid to environment variable, deal with the case where
            # PAR.ENVIRONS is an empty string
            task_id_str = "SEISFLOWS_TASKID=0"
            if not run_call.strip().endswith("--environment"):
                task_id_str = f",{task_id_str}"  # appending to the list of vars
            run_call += task_id_str
            self.logger.debug(run_call)

        # The standard response from SLURM when submitting jobs
        # is something like 'Submitted batch job 441636', we want job number
        stdout = subprocess.run(run_call,
                                stdout=subprocess.PIPE,
                                text=True,
                                shell=True).stdout
        job_ids = job_id_list(stdout, single)

        # Contiously check for job completion on ALL running array jobs
        is_done = False
        count = 0
        bad_states = [
            "TIMEOUT", "FAILED", "NODE_FAIL", "OUT_OF_MEMORY", "CANCELLED"
        ]
        while not is_done:
            # Wait a bit to avoid rapidly querying sacct
            time.sleep(5)
            is_done, states = job_array_status(job_ids)
            # EXIT CONDITION: if any of the jobs provide job failure codes
            if not is_done:
                for i, state in enumerate(states):
                    # Sometimes states can be something like 'CANCELLED+', so
                    # we can't do exact string matching, check partial matches
                    if any([check in state for check in bad_states]):
                        print(
                            msg.cli((f"Stopping workflow for {state} job. "
                                     f"Please check log file for details."),
                                    items=[
                                        f"TASK:    {classname}.{method}",
                                        f"TASK ID: {job_ids[i]}",
                                        f"LOG:     logs/{job_ids[i]}",
                                        f"SBATCH:  {run_call}"
                                    ],
                                    header="slurm run error",
                                    border="="))
                        sys.exit(-1)
            # WAIT CONDITION: if sacct is not working, we'll get stuck in a loop
            if "UNDEFINED" in states:
                count += 1
                # Every 10 counts, warn the user this is unexpected behavior
                if not count % 10:
                    job_id = job_ids[states.index("UNDEFINED")]
                    self.logger.warning(
                        f"SLURM command 'sacct {job_id}' has "
                        f"returned unexpected response {count} "
                        f"times. This job may have failed "
                        f"unexpectedly. Consider checking "
                        f"manually")

        self.logger.info(f"Task {classname}.{method} finished successfully")

    def taskid(self):
        """
        Provides a unique identifier for each running task

        :rtype: int
        :return: identifier for a given task
        """
        # If not set, this environment variable will return None
        sftaskid = os.getenv("SEISFLOWS_TASKID")

        if sftaskid is None:
            sftaskid = os.getenv("SLURM_ARRAY_TASK_ID")
            if sftaskid is None:
                print(
                    msg.cli(
                        "system.taskid() environment variable not found. "
                        "Assuming DEBUG mode and returning taskid==0. "
                        "If not DEBUG mode, please check SYSTEM.run()",
                        header="warning",
                        border="="))
                sftaskid = 0

        return int(sftaskid)
示例#17
0
文件: test.py 项目: bch0w/seisflows
class Test(custom_import("workflow", "base")):
    """
    This is a template Base class
    """
    # Class-specific logger accessed using self.logger
    # When this logger is called, e.g., self.logger.info("text"), the logging
    # package will know exactly which module, class and function the log
    # statement has been sent from, extraordinarily helpful for debugging.
    logger = logging.getLogger(__name__).getChild(__qualname__)

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.

        :rtype: seisflows.config.SeisFlowsPathsParameters
        :return: Paths and parameters that define the given class

        """
        sf = SeisFlowsPathsParameters(super().required)

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths. The validate function ensures that all
        required paths and parameters are accounted for, and that all
        optional paths and parameters are set to user-defined or default values.

        :type validate: bool
        :param validate: set required paths and parameters into sys.modules
        """
        # The validate statement is used internally to set required paths
        # and parameters into sys.modules. Default values are stored for
        # optional terms
        if validate:
            self.required.validate()

    def main(self, return_flow=False):
        """
        This controls the main testing workflow
        """
        FLOW = [self.test_system]
        if return_flow:
            return FLOW

        for func in FLOW:
            func()

    def test_function(self):
        """
        A simple function that can be called by system.run()
        """
        print(f"Hello world, from taskid {system.taskid()}")

    def test_system(self):
        """
        This is an example test function which can take any number of args
        or kwargs. The base class is responsible for setting all of the
        necessary functions
        """
        system.run(classname="workflow", method="test_function")
        # Wait a bit for system to catch up
        time.sleep(3)
        system.run(classname="workflow", method="test_function", single=True)
示例#18
0
文件: lsf.py 项目: bch0w/seisflows
class Lsf(custom_import("system", "cluster")):
    """
    An interface through which to submit workflows, run tasks in serial or
    parallel, and perform other system functions.

    By hiding environment details behind a python interface layer, these
    classes provide a consistent command set across different computing
    environments.

    Intermediate files are written to a global scratch path PATH.SCRATCH,
    which must be accessible to all compute nodes.

    Optionally, users can provide a local scratch path PATH.LOCAL if each
    compute node has its own local filesystem.

    For important additional information, please see
    http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration
    """
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.
        """
        super().__init__()

    @property
    def required(self):
        """
        Checks parameters and paths
        """
        sf = SeisFlowsPathsParameters(super().required)

        sf.par("MPIEXEC",
               required=False,
               default="mpiexec",
               par_type=str,
               docstr="Function used to invoke executables on the system. "
               "For example 'srun' on SLURM systems, or './' on a "
               "workstation. If left blank, will guess based on the "
               "system.")

        # Define the Parameters required by this module
        sf.par("NTASKMAX",
               required=False,
               default=100,
               par_type=int,
               docstr="Limit on the number of concurrent tasks in array")

        sf.par("NODESIZE",
               required=True,
               par_type=int,
               docstr="The number of cores per node defined by the system")

        sf.par("LSFARGS",
               required=False,
               default="",
               par_type=str,
               docstr="Any optional, additional LSG arguments that will be "
               "passed to the LSF submit scripts")

    def submit(self, workflow):
        """
        Submits workflow
        """
        # Prepare 'bsub' arguments
        submit_call = " ".join([
            f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}",
            f"-o {self.output_log}.log", f"-e {self.error_log}.log",
            f"-n {PAR.NODESIZE}", f'-R "span[ptile={PAR.NODESIZE}"',
            f"-W {PAR.WALLTIME:d}:00",
            os.path.join(findpath("seisflows.system"), "wrappers",
                         "submit"), PATH.OUTPUT
        ])

        super().submit(workflow, submit_call)

    def run(self, classname, method, *args, **kwargs):
        """
        Runs task multiple times in embarrassingly parallel fasion on the
        maui cluster

        Executes classname.method(*args, **kwargs) NTASK times,
        each time on NPROC CPU cores

        :type classname: str
        :param classname: the class to run
        :type method: str
        :param method: the method from the given `classname` to run
        """
        # Checkpoint this individual method before proceeding
        self.checkpoint(PATH.OUTPUT, classname, method, args, kwargs)

        # Submit job array
        run_call = " ".join([
            f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}", f"-n {PAR.NPROC}",
            f'-R "span[ptile={PAR.NODESIZE}"', f"-W {PAR.TASKTIME:d}:00",
            f"-o {os.path.join(PATH.WORKDIR, 'output.logs', '%J_%I')}",
            f"[1-{PAR.NTASK}] % {PAR.NTASKMAX}",
            f"{os.path.join(findpath('seisflows.system'), 'wrappers', 'run')}",
            f"{PATH.OUTPUT}", f"{classname}", f"{method}", f"{PAR.ENVIRONS}"
        ])

        stdout = subprocess.check_output(run_call, shell=True)

        # keep track of job ids
        jobs = self.job_id_list(stdout, PAR.NTASK)

        while True:
            # Wait seconds before checking status again
            time.sleep(30)
            self.timestamp()
            isdone, jobs = self.job_status(classname, method, jobs)
            if isdone:
                return

    def run_single(self, classname, method, *args, **kwargs):
        """ Runs task multiple times in embarrassingly parallel fasion

          Executes classname.method(*args, **kwargs) NTASK times, each time on
          NPROC cpu cores
        """
        # Checkpoint this individual method before proceeding
        self.checkpoint(PATH.OUTPUT, classname, method, args, kwargs)

        # Submit job array
        run_call = " ".join([
            f"bsub", f"{PAR.LSFARGS}", f"-J {PAR.TITLE}", f"-n {PAR.NPROC}",
            f'-R "span[ptile={PAR.NODESIZE}"', f"-W {PAR.TASKTIME:d}:00",
            f"-o {os.path.join(PATH.WORKDIR, 'output.logs', '%J')}", f"[1-1]",
            f"{os.path.join(findpath('seisflows.system'), 'wrappers', 'run')}",
            f"{PATH.OUTPUT}", f"{classname}", f"{method}", f"{PAR.ENVIRONS}"
        ])

        stdout = check_output(run_call, shell=True)

        # keep track of job ids
        jobs = self.job_id_list(stdout, ntask=1)

        while True:
            # Wait seconds before checking status again
            time.sleep(30)
            self.timestamp()
            isdone, jobs = self.job_status(classname, method, jobs)
            if isdone:
                return

    def job_id_list(self, stdout, ntask):
        """
        Parses job id list from sbatch standard output

        :type stdout: str
        :param stdout: the output of subprocess.check_output()
        :type ntask: int
        :param ntask: number of tasks currently running
        """
        job = stdout.split()[1].strip()[1:-1]
        if ntask == 1:
            return [job]
        else:
            number_jobs = range(1, PAR.NSRC + 1)
            return ["{job}[{}]".format(_) for _ in number_jobs]

    def job_status(self, classname, method, jobs):
        """
        Queries completion status of a single job

        :type job: str
        :param job: job id to query
        """
        job_finished = []
        for job in jobs:
            state = self._query(job)
            if state == "DONE":
                job_finished.append(True)
            else:
                job_finished.append(False)

            if state == "EXIT":
                print(
                    msg.cli(
                        f"LSF job {job} failed to execute "
                        f"{classname}.{method}.",
                        header="error",
                        border="="))
                sys.exit(-1)

        isdone = all(job_finished)

        return isdone, jobs

    def _query(self, jobid):
        """
        Retrives job state from LSF database

        :type jobid: str
        :param jobid: job id to query LSF system about
        """
        # Write the job status output to a temporary file
        with open(os.path.join(PATH.SYSTEM, "job_status", "w")) as f:
            call('bjobs -a -d "{jobid}"', stdout=f)

        # Read the job status back from the text file
        with open(os.path.join(PATH.SYSTEM, "job_status", "r")) as f:
            lines = f.readlines()
            state = lines[1].split()[2].strip()

        return state

    def taskid(self):
        """
        Provides a unique identifier for each running task
        """
        return int(os.getenv('LSB_JOBINDEX')) - 1

    def timestamp(self):
        """
        Timestamp the current running job
        """
        with open(os.path.join(PATH.SYSTEM, "timestamps", "a")) as f:
            f.write(time.strftime("%H:%M:%S"))
            f.write("\n")

    def save_kwargs(self, classname, method, kwargs):
        """
        Save key word arguments as a pickle object.

        :type classname: str
        :param classname: the class to run
        :type method: str
        :param method: the method from the given `classname` to run
        """
        kwargspath = os.path.join(PATH.OUTPUT, "kwargs")
        kwargsfile = os.path.join(kwargspath, f"{classname}_{method}.p")

        unix.mkdir(kwargspath)
        saveobj(kwargsfile, kwargs)
示例#19
0
class Pyatoa(custom_import("preprocess", "base")):
    """
    Data preprocessing class using the Pyaflowa class within the Pyatoa package.
    In charge of data discovery, preprocessing, filtering, misfiti
    quantification and data storage. The User does not need to implement Pyatoa,
    but rather interacts with it via the parameters and paths of SeisFlows3.
    """
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by __init__!
        Attributes are just initialized as NoneTypes for clarity and docstrings

        :param logger: Class-specific logging module, log statements pushed
            from this logger will be tagged by its specific module/classname
        """
        pass

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters()

        # Define the Parameters required by this module
        sf.par("UNIT_OUTPUT", required=True, par_type=str,
               docstr="Data units. Must match the synthetic output of external "
                      "solver. Available: ['DISP': displacement, "
                      "'VEL': velocity, 'ACC': acceleration]")

        # TODO Check this against T0 in check()
        sf.par("START_PAD", required=False, default=0, par_type=float,
               docstr="For data gathering; time before origin time to gather. "
                      "START_PAD >= T_0 in SPECFEM constants.h.in. "
                      "Positive values only")

        # TODO set this automatically by setting equal NT * DT
        sf.par("END_PAD", required=True, par_type=float,
               docstr="For data gathering; time after origin time to gather. "
                      "END_PAD >= NT * DT (of Par_file). Positive values only")

        sf.par("MIN_PERIOD", required=False, default="", par_type=float,
               docstr="Minimum filter corner in unit seconds. Bandpass filter "
                      "if set with `MAX_PERIOD`, highpass filter if set "
                      "without `MAX_PERIOD`, no filtering if not set and "
                      "`MAX_PERIOD also not set")

        sf.par("MAX_PERIOD", required=False, default="", par_type=float,
               docstr="Maximum filter corner in unit seconds. Bandpass filter "
                      "if set with `MIN_PERIOD`, lowpass filter if set "
                      "without `MIN_PERIOD`, no filtering if not set and "
                      "`MIN_PERIOD also not set")

        sf.par("CORNERS", required=False, default=4, par_type=int,
               docstr="Number of filter corners applied to filtering")

        sf.par("CLIENT", required=False, par_type=str,
               docstr="Client name for ObsPy FDSN data gathering. Pyatoa will "
                      "attempt to collect waveform and metadata based on "
                      "network and station codes provided in the SPECFEM "
                      "STATIONS file. If set None, no FDSN gathering will be "
                      "attempted")

        sf.par("ROTATE", required=False, default=False, par_type=bool,
               docstr="Attempt to rotate waveform components from NEZ -> RTZ")

        sf.par("PYFLEX_PRESET", required=False, default="default", 
               par_type=str,
               docstr="Parameter map for misfit window configuration defined "
                      "by Pyflex. IF None, misfit and adjoint sources will be "
                      "calculated on whole traces. For available choices, "
                      "see Pyatoa docs page (pyatoa.rtfd.io)")

        sf.par("FIX_WINDOWS", required=False, default=False,
               par_type="bool or str",
               docstr="How to address misfit window evaluation at each "
                      "evaluation. Options to re-use misfit windows collected "
                      "during an inversion, available options: "
                      "[True, False, 'ITER', 'ONCE'] "
                      "True: Re-use windows after first evaluation (i01s00); "
                      "False: Calculate new windows each evaluation; "
                      "'ITER': Calculate new windows at first evaluation of "
                      "each iteration (e.g., i01s00... i02s00..."
                      "'ONCE': Calculate new windows at first evaluation of "
                      "the workflow, i.e., at PAR.BEGIN")

        sf.par("ADJ_SRC_TYPE", required=False, default="cc",  par_type=str,
               docstr="Adjoint source type to evaluate misfit, defined by "
                      "Pyadjoint. Currently available options: "
                      "['cc': cross-correlation, 'mt': multitaper, "
                      "wav: waveform']")

        sf.par("PLOT", required=False, default=True, par_type=bool,
               docstr="Attempt to plot waveforms and maps as PDF files at each "
                      "function evaluation")

        sf.par("PYATOA_LOG_LEVEL", required=False, default="DEBUG", 
               par_type=str,
               docstr="Log level to set Pyatoa, Pyflex, Pyadjoint. Available: "
                      "['null': no logging, 'warning': warnings only, "
                      "'info': task tracking, "
                      "'debug': log all small details (recommended)]")

        # Parameters to control saving scratch/preprocess files to work dir.
        sf.par("SAVE_DATASETS", required=False, default=True, par_type=bool,
               docstr="Save PyASDF HDF5 datasets to disk. These datasets store "
                      "waveform data, metadata, misfit windows, adjoint "
                      "sources and configuration parameters")

        sf.par("SAVE_FIGURES", required=False, default=True, par_type=bool,
               docstr="Save output waveform figures to disk as PDFs")

        sf.par("SAVE_LOGS", required=False, default=True, par_type=bool,
               docstr="Save event-specific Pyatoa logs to disk as .txt files")

        # Define the Paths required by this module
        sf.path("PREPROCESS", required=False,
                default=os.path.join(PATH.SCRATCH, "preprocess"),
                docstr="scratch/ path to store waveform data and figures. "
                       "Pyatoa will generate an internal directory structure "
                       "here")

        sf.path("DATA", required=False,
                docstr="Directory to locally stored data. Pyatoa looks for "
                       "waveform and metadata in the 'PATH.DATA/mseed' and "
                       "'PATH.DATA/seed', directories respectively.")

        return sf

    def check(self, validate=True):
        """ 
        Checks Parameter and Path files, will be run at the start of a Seisflows
        workflow to ensure that things are set appropriately.
        """
        if validate:
            self.required.validate()

        # Check that other modules have set parameters that will be used here
        for required_parameter in ["COMPONENTS", "FORMAT"]:
            assert(required_parameter in PAR), \
                f"Pyatoa requires {required_parameter}"

        assert(PAR.FORMAT.upper() == "ASCII"), \
            "Pyatoa preprocess requires PAR.FORMAT=='ASCII'"

        assert((PAR.DT * PAR.NT) <= (PAR.START_PAD + PAR.END_PAD)), \
            ("Pyatoa preprocess must have (PAR.START_PAD + PAR.END_PAD) >= "
             "(PAR.DT * PAR.NT), current values will not provide sufficiently "
             f"long data traces (DT*NT={PAR.DT * PAR.NT}; "
             f"START+END={PAR.START_PAD + PAR.END_PAD}")

    def setup(self):
        """
        Sets up data preprocessing machinery by establishing an internally
        defined directory structure that will be used to store the outputs 
        of the preprocessing workflow

        Akin to an __init__ class, but to be called externally by the workflow.
        """
        unix.mkdir(PATH.PREPROCESS)

    def prepare_eval_grad(self, cwd, source_name, taskid, **kwargs):
        """
        Prepare the gradient evaluation by gathering, preprocessing waveforms, 
        and measuring misfit between observations and synthetics using Pyatoa.

        Reads in observed and synthetic waveforms, applies optional
        preprocessing, assesses misfit, and writes out adjoint sources and
        STATIONS_ADJOINT file.

        .. note::
            Meant to be called by solver.eval_func(), may have unused arguments
            to keep functions general across preprocessing subclasses.

        :type cwd: str
        :param cwd: current specfem working directory containing observed and
            synthetic seismic data to be read and processed. Should be defined
            by solver.cwd
        :type source_name: str
        :param source_name: the event id to be used for tagging and data lookup.
            Should be defined by solver.source_name
        :type taskid: int
        :param taskid: identifier of the currently running solver instance.
            Should be defined by solver.taskid
        :type filenames: list of str
        :param filenames: [not used] list of filenames defining the files in
            traces
        """
        if taskid == 0:
            self.logger.debug("preparing files for gradient evaluation with "
                              "Pyaflowa")

        # Process all the stations for a given event using Pyaflowa
        pyaflowa = self.setup_event_pyaflowa(source_name)
        scaled_misfit = pyaflowa.process()

        if scaled_misfit is None:
            print(msg.cli(f"Event {source_name} returned no misfit, you may "
                          f"want to check logs and waveform figures, "
                          f"or consider discarding this event from your "
                          f"workflow", 
                          items=[pyaflowa.paths.logs, pyaflowa.paths.figures],
                          header="pyatoa preprocessing error", border="="))
            sys.exit(-1)

        # Event misfit defined by Tape et al. (2010) written to solver dir.
        self.write_residuals(path=cwd, scaled_misfit=scaled_misfit)

    def setup_event_pyaflowa(self, source_name=None):
        """
        A convenience function to set up a Pyaflowa processing instance for
        a specific event. 

        .. note::
            This is meant to be called by preprocess.prepare_eval_grad() but its
            also useful for debugging and manual processing where you can simply
            return a formatted Pyaflowa object and debug it directly.

        :type source_name: str
        :param source_name: solver source name to evaluate setup for. Must 
            match from list defined by: solver.source_names
        """
        # Late import because preprocess is loaded before optimize,
        # Optimize required to know which iteration/step_count we are at
        solver = sys.modules["seisflows_solver"]
        optimize = sys.modules["seisflows_optimize"]

        iteration = optimize.iter
        if source_name is None:
            source_name = solver.source_names[0]

        # Deal with the migration case where no step count given
        try:
            step_count = optimize.line_search.step_count
        except AttributeError:
            step_count = ""

        # Outsource data processing to an event-specfic Pyaflowa instance
        pyaflowa = Pyaflowa(sfpar=PAR, sfpath=PATH)
        pyaflowa.setup(source_name=source_name, iteration=iteration, 
                       step_count=step_count, loc="*", cha="*")
        
        return pyaflowa

    def finalize(self):
        """
        Run some serial finalization tasks specific to Pyatoa, which will help
        aggregate the collection of output information.

        .. note::
            This finalize function performs the following tasks:
            * Generate .csv files using the Inspector
            * Aggregate event-specific PDFs into a single evaluation PDF
            * Save scratch/ data into output/ if requested
        """
        # Initiate Pyaflowa to get access to path structure
        pyaflowa = Pyaflowa(sfpar=PAR, sfpath=PATH)
        unix.cd(pyaflowa.paths.datasets)

        # Generate the Inspector from existing datasets and save to disk
        # Allow this is fail, which might happen if we don't have enough data
        # or the Dataset is not formatted as expected
        insp = Inspector(PAR.TITLE, verbose=False)
        try:
            insp.discover()
            insp.save()
        except Exception as e:
            self.logger.warning(f"Uncontrolled exception in Inspector creation "
                                f"will not create inspector:\n{e}")

        # Make the final PDF for easier User ingestion of waveform/map figures
        pyaflowa.make_evaluation_composite_pdf()

        # Move scratch/ directory results into more permanent storage
        if PAR.SAVE_DATASETS:
            datasets = glob(os.path.join(pyaflowa.paths.datasets, "*.h5"))
            self._save_quantity(datasets, tag="datasets")
        
        if PAR.SAVE_FIGURES:
            figures = glob(os.path.join(pyaflowa.paths.figures, "*.pdf"))
            self._save_quantity(figures, tag="figures")

        if PAR.SAVE_LOGS:
            logs = glob(os.path.join(pyaflowa.paths.logs, "*.txt"))
            path_out = os.path.join(PATH.WORKDIR, CFGPATHS.LOGDIR)
            self._save_quantity(logs, path_out=path_out)
    
    def _save_quantity(self, filepaths, tag="", path_out=""):
        """
        Repeatable convenience function to save quantities from the scratch/
        directory to the output/ directory

        :type filepaths: list
        :param filepaths: full path to files that should be saved to output/
        :type tag: str  
        :param tag: tag for saving the files in PATH.OUTPUT. If not given, will
            save directly into the output/ directory
        :type path_out: str
        :param path_out: overwrite the default output path file naming
        """       
        if not path_out:
            path_out = os.path.join(PATH.OUTPUT, tag)

        if not os.path.exists(path_out):
            unix.mkdir(path_out)

        for src in filepaths:
            dst = os.path.join(path_out, os.path.basename(src))
            unix.cp(src, dst) 

    def write_residuals(self, path, scaled_misfit):
        """
        Computes residuals and saves them to a text file in the appropriate path

        :type path: str        
        :param path: scratch directory path, e.g. PATH.GRAD or PATH.FUNC
        :type scaled_misfit: float
        :param scaled_misfit: the summation of misfit from each 
            source-receiver pair calculated by prepare_eval_grad()
        :type source_name: str
        :param source_name: name of the source related to the misfit, used
            for file naming
        """
        residuals_file = os.path.join(path, "residuals")        
        np.savetxt(residuals_file, [scaled_misfit], fmt="%11.6e")

    def sum_residuals(self, files):
        """
        Averages the event misfits and returns the total misfit.
        Total misfit defined by Tape et al. (2010)

        :type files: str
        :param files: list of single-column text files containing residuals
            that will have been generated using prepare_eval_grad()
        :rtype: float
        :return: average misfit
        """
        if len(files) != PAR.NTASK:
            print(msg.cli(f"Pyatoa preprocessing module did not recover the "
                          f"correct number of residual files "
                          f"({len(files)}/{PAR.NTASK}). Please check that "
                          f"the preprocessing logs", header="error")
                  )
            sys.exit(-1)

        total_misfit = 0
        for filename in files:
            total_misfit += np.sum(np.loadtxt(filename))

        total_misfit /= PAR.NTASK

        return total_misfit
示例#20
0
class NLCG(custom_import("optimize", "base")):
    """
    Nonlinear conjugate gradient method

    Optimization Variables:
        m: model
        f: objective function value
        g: gradient direction
        p: search direction

    Line Search Variables:
        x: list of step lenths from current line search
        f: correpsonding list of function values
        m: number of step lengths in current line search
        n: number of model updates in optimization problem
        gtg: dot product of gradient with itself
        gtp: dot product of gradient and search direction

    Status codes
        status > 0  : finished
        status == 0 : not finished
        status < 0  : failed
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type NLCG_iter: Class
        :param NLCG_iter: an internally used iteration that differs from
            optimization iter. Keeps track of internal NLCG memory.
        """
        super().__init__()
        self.NLCG_iter = 0
        self.calc_beta = pollak_ribere  # !!! Allow the user to choose this fx?

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        # Define the Parameters required by this module
        sf.par("NLCGMAX",
               required=False,
               default="null",
               par_type=float,
               docstr="NLCG periodic restart interval, between 1 and inf")

        sf.par("NLCGTHRESH",
               required=False,
               default="null",
               par_type=float,
               docstr="NLCG conjugacy restart threshold, between 1 and inf")

        return sf

    def check(self, validate=True):
        """
        Checks parameters, paths, and dependencies
        """
        if validate:
            self.required.validate()
        super().check(validate=False)

        assert(PAR.LINESEARCH.upper() == "BRACKET"), \
            f"NLCG requires a bracketing line search algorithm"

    def compute_direction(self):
        """
        Compute search direction using the Nonlinear Conjugate Gradient method
        The potential outcomes when computing direction with NLCG

        1. First iteration of an NLCG optimization, search direction is
            the inverse gradient
        2. NLCG internal iteration ticks over the maximum allowable number of
            iterations, force a restart condition, search direction is the
            inverse gradient
        3. New NLCG search direction does not have conjugacy with previous
            search direction, force restart, inverse gradient search direction
        4. New NLCG search direction is not a descent direction,
            force restart, inverse gradient search direction
        5. New NLCG search direction has conjugacy and is a descent direction
            and is set as the new search direction.
        """
        self.logger.debug(f"computing search direction with NLCG")
        self.NLCG_iter += 1

        unix.cd(PATH.OPTIMIZE)

        # Load the current gradient direction
        g_new = self.load(self.g_new)

        # CASE 1: If first iteration, search direction is the current gradient
        if self.NLCG_iter == 1:
            self.logger.info("first NLCG iteration, setting search direction"
                             "as inverse gradient")
            p_new = -g_new
            restarted = 0
        # CASE 2: Force restart if the iterations have surpassed the maximum
        # number of allowable iter
        elif self.NLCG_iter > PAR.NLCGMAX:
            logger.info("restarting NLCG due to periodic restart condition. "
                        "setting search direction as inverse gradient")
            self.restart()
            p_new = -g_new
            restarted = 1
        # Normal NLCG direction compuitation
        else:
            # Compute search direction
            g_old = self.load(self.g_old)
            p_old = self.load(self.p_old)

            # Apply preconditioner and calc. scale factor for search dir. (beta)
            if self.precond:
                beta = self.calc_beta(g_new, g_old, self.precond)
                p_new = -self.precond(g_new) + beta * p_old
            else:
                beta = self.calc_beta(g_new, g_old)
                p_new = -g_new + beta * p_old

            # Check restart conditions, return search direction and status
            if check_conjugacy(g_new, g_old) > PAR.NLCGTHRESH:
                self.logger.info("restarting NLCG due to loss of conjugacy")
                self.restart()
                p_new = -g_new
                restarted = 1
            elif check_descent(p_new, g_new) > 0.:
                self.logger.info("restarting NLCG, not a descent direction")
                self.restart()
                p_new = -g_new
                restarted = 1
            else:
                p_new = p_new
                restarted = 0

        # Save values to disk and memory
        self.save(self.p_new, p_new)
        self.restarted = restarted

    def restart(self):
        """
        Overwrite the Base restart class and include a restart of the NLCG
        """
        super().restart()
        self.NLCG_iter = 1
示例#21
0
class ThriftyInversion(custom_import("workflow", "inversion")):
    """
    Thrifty inversion which attempts to save resources by re-using previous
    line search results for the current iteration.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        :type thrifty: bool
        :param thrifty: the current status of the inversion.
            if False: assumed to be first iteration, a restart, or some other
            condition has been met which means inversion is defaulting to normal
            behavior
            if True: A well-scaled inversion can skip the function evaluation
            of the next iteration by using the line search results of the
            previous iteration
        """
        super().__init__()
        self.thrifty = False

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        super().check(validate=False)
        if validate:
            self.required.validate()

        assert PAR.LINESEARCH == "Backtrack", \
            "Thrifty inversion requires backtracking line search"

    def initialize(self):
        """
        If line search can be carried over, skip initialization step
        Or if manually starting a new run, start with normal inversion init
        """
        if not self.thrifty or optimize.iter == PAR.BEGIN:
            super().initialize()
        else:
            self.logger.info(msg.mjr("INITIALIZING THRIFTY INVERSION"))

    def clean(self):
        """
        Determine if forward simulation from line search can be carried over.
        We assume clean() is the final flow() argument so that we can update
        the thrifty status here.
        """
        self.update_status()

        if self.thrifty:
            self.logger.info(
                msg.mnr("THRIFTY CLEANING  WORKDIR FOR NEXT "
                        "ITERATION"))
            unix.rm(PATH.GRAD)
            unix.mv(PATH.FUNC, PATH.GRAD)
            unix.mkdir(PATH.FUNC)
        else:
            super().clean()

    def update_status(self):
        """
        Determine if line search forward simulation can be carried over based
        on a number of criteria
        """
        self.logger.info("updating thrifty inversion status")
        if optimize.iter == PAR.BEGIN:
            self.logger.info("1st iteration, defaulting to inversion workflow")
            thrifty = False
        elif optimize.restarted:
            self.logger.info("optimization has been restarted, defaulting to "
                             "inversion workflow")
            thrifty = False
        elif optimize.iter == PAR.END:
            self.logger.info(
                "final iteration, defaulting to inversion workflow")
            thrifty = False
        else:
            self.logger.info("continuing with thrifty inversion workflow")
            thrifty = True

        self.thrifty = thrifty
示例#22
0
class Workstation(custom_import("system", "base")):
    """
    Run tasks in a serial fashion on a single local machine
    """
    logger = logging.getLogger(__name__).getChild(__qualname__)

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        sf.par("MPIEXEC",
               required=False,
               default=None,
               par_type=str,
               docstr="Function used to invoke executables on the system. "
               "For example 'srun' on SLURM systems, or './' on a "
               "workstation. If left blank, will guess based on the "
               "system.")

        sf.par("NTASK",
               required=False,
               default=1,
               par_type=int,
               docstr="Number of separate, individual tasks. Also equal to "
               "the number of desired sources in workflow")

        sf.par("NPROC",
               required=False,
               default=1,
               par_type=int,
               docstr="Number of processor to use for each simulation")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        super().check(validate=False)
        if validate:
            self.required.validate()

    def submit(self):
        """
        Submits the main workflow job
        """
        self.setup()
        workflow = sys.modules["seisflows_workflow"]
        workflow.checkpoint()
        workflow.main()

    def run(self, classname, method, single=False, **kwargs):
        """
        Executes task multiple times in serial.

        .. note::
            kwargs will be passed to the underlying `method` that is called

        :type classname: str
        :param classname: the class to run
        :type method: str
        :param method: the method from the given `classname` to run
        :type single: bool
        :param single: run a single-process, non-parallel task, such as
            smoothing the gradient, which only needs to be run by once.
            This will change how the job array and the number of tasks is
            defined, such that the job is submitted as a single-core job to
            the system.
        """
        self.checkpoint(PATH.OUTPUT, classname, method, kwargs)

        # Allows dynamic retrieval of any function from within package, e.g.,
        # <bound method Base.eval_func of <seisflows3.solver.specfem2d...
        class_module = sys.modules[f"seisflows_{classname}"]
        function = getattr(class_module, method)

        if single:
            ntasks = 1
        else:
            ntasks = PAR.NTASK

        for taskid in range(ntasks):
            # os environment variables can only be strings, these need to be
            # converted back to integers by system.taskid()
            os.environ["SEISFLOWS_TASKID"] = str(taskid)
            if taskid == 0:
                self.logger.info(f"running task {classname}.{method} "
                                 f"{PAR.NTASK} times")
            function(**kwargs)

    def taskid(self):
        """
        Provides a unique identifier for each running task, which should be set
        by the 'run'' command.

        :rtype: int
        :return: returns the os environment variable SEISFLOWS_TASKID which is
            set by run() to label each of the currently
            running processes on the SYSTEM.
        """
        sftaskid = os.getenv("SEISFLOWS_TASKID")
        if sftaskid is None:
            print(
                msg.cli(
                    "system.taskid() environment variable not found. "
                    "Assuming DEBUG mode and returning taskid==0. "
                    "If not DEBUG mode, please check SYSTEM.run()",
                    header="warning",
                    border="="))
            sftaskid = 0
        return int(sftaskid)
示例#23
0
class Chinook(custom_import("system", "slurm")):
    """
    System interface for the University of Alaska HPC Chinook, which operates
    on a SLURM system.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type partitions: dict
        :param partitions: Chinook has various partitions which each have their
            own number of cores per compute node, defined here
        """
        super().__init__()
        self.partitions = {
            "debug": 24,
            "t1small": 28,
            "t2small": 28,
            "t1standard": 40,
            "t2standard": 40,
            "analysis": 28
        }

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        sf.par("PARTITION",
               required=False,
               default="t1small",
               par_type=int,
               docstr="Name of partition on main cluster, available: "
               "analysis, t1small, t2small, t1standard, t2standard, gpu")

        sf.par("MPIEXEC",
               required=False,
               default="srun",
               par_type=str,
               docstr="Function used to invoke parallel executables")

        return sf

    def check(self, validate=True):
        """
        Checks parameters and paths
        """
        if validate:
            self.required.validate()
        super().check(validate=False)

        assert(PAR.PARTITION in self.partitions.keys()), \
            f"Chinook partition must be in {self.partitions.keys()}"

        assert(PAR.NODESIZE == self.partitions[PAR.PARTITION]), \
            (f"PARTITION {PAR.PARTITION} is expected to have NODESIZE="
             f"{self.partitions[PAR.PARTITION]}, not current {PAR.NODESIZE}")
示例#24
0
class Migration(custom_import("workflow", "base")):
    """
    Migration base class.

    Performs the workflow of an inversion up to the postprocessing. In the
    terminology of seismic exploration, implements a 'reverse time migration'.
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        return sf

    def main(self, return_flow=False):
        """s
        Migrates seismic data to generate sensitivity kernels

        :type return_flow: bool
        :param return_flow: for CLI tool, simply returns the flow function
            rather than running the workflow. Used for print statements etc.
        """
        flow = (
            self.setup,
            self.generate_synthetics,
            self.backproject,
            self.process_kernels,
            self.finalize,
        )
        if return_flow:
            return flow

        # Allow workflow resume from and stop after given flow functions
        start, stop = self.check_stop_resume_cond(flow)

        # Run each argument in flow
        self.logger.info(msg.mjr("STARTING MIGRATION WORKFLOW"))
        for func in flow[start:stop]:
            func()
        self.logger.info(msg.mjr("FINISHED MIGRATION WORKFLOW"))

    def setup(self):
        """
        Sets up the SeisFlows3 modules for the Migration
        """
        # Set up all the requisite modules from the master job
        self.logger.info(msg.mnr("PERFORMING MODULE SETUP"))
        preprocess.setup()
        postprocess.setup()
        system.run("solver", "setup")

    def generate_synthetics(self):
        """
        Performs forward simulation, and evaluates the objective function
        """
        self.logger.info(msg.sub("PREPARING VELOCITY MODEL"))
        src = os.path.join(PATH.OUTPUT, "model_init")
        dst = os.path.join(PATH.SCRATCH, "model")

        assert os.path.exists(src)
        unix.cp(src, dst)

        self.logger.info(msg.sub("EVALUATE OBJECTIVE FUNCTION"))
        system.run("solver",
                   "eval_func",
                   path=PATH.SCRATCH,
                   write_residuals=True)

    def backproject(self):
        """
        Backproject or create kernels by running adjoint simulations
        """
        self.logger.info(msg.sub("BACKPROJECT / EVALUATE GRADIENT"))
        system.run("solver",
                   "eval_grad",
                   path=PATH.SCRATCH,
                   export_traces=PAR.SAVETRACES)

    def process_kernels(self):
        """
        Backproject to create kernels from synthetics
        """
        system.run("postprocess",
                   "process_kernels",
                   single=True,
                   path=os.path.join(PATH.SCRATCH, "kernels"),
                   parameters=solver.parameters)

        try:
            # TODO Figure out a better method for running this try except
            system.run("postprocess",
                       "process_kernels",
                       single=True,
                       path=os.path.join(PATH.SCRATCH, "kernels"),
                       parameters=["rhop"])
        except:
            pass

    def finalize(self):
        """
        Saves results from current model update iteration
        """
        self.logger.info(msg.mnr("FINALIZING MIGRATION WORKFLOW"))

        if PAR.SAVETRACES:
            self.save_traces()
        if PAR.SAVEKERNELS:
            self.save_kernels()
        else:
            self.save_kernels_sum()

    def save_kernels_sum(self):
        """
        Same summed kernels into the output directory
        """
        src = os.path.join(PATH.SCRATCH, "kernels", "sum")
        dst = os.path.join(PATH.OUTPUT, "kernels")
        unix.mkdir(dst)
        unix.cp(src, dst)

    def save_kernels(self):
        """
        Save individual kernels into the output directory
        """
        src = os.path.join(PATH.SCRATCH, "kernels")
        dst = PATH.OUTPUT
        unix.mkdir(dst)
        unix.cp(src, dst)

    def save_traces(self):
        """
        Save waveform traces into the output directory
        """
        src = os.path.join(PATH.SCRATCH, "traces")
        dst = PATH.OUTPUT
        unix.cp(src, dst)
示例#25
0
class ThriftyMaui(custom_import("workflow", "thrifty_inversion")):
    """
    Waveform thrify inversion class specifically for running jobs on the
    New Zealand HPC cluster Maui.

    On Maui, Anaconda is only available on an ancillary cluster, Maui_ancil,
    so jobs involving the preprocessing module must be called through a
    separate system run call.
    """
    def check(self):
        """
        Ensure that the correct submodules are specified, otherwise
        this workflow won't function properly.
        """
        super().check()

        if "MAUI" not in PAR.SYSTEM.upper():
            raise ParameterError()

        if "MAUI" not in PAR.SOLVER.upper():
            raise ParameterError()

    def setup(self):
        """
        Lays groundwork for inversion by running setup() functions for the
        involved sub-modules, and generating synthetic true data if necessary,
        and generating the pre-requisite database files. Should only be run once
        at the iteration 1
        """
        # Set up all the requisite modules
        print("SETUP")
        preprocess.setup()
        postprocess.setup()
        optimize.setup()

        # Run the setup in serial to reduce unnecessary job submissions
        # Needs to be split up into multiple system calls
        solver.initialize_solver_directories()

        if PAR.CASE.upper() == "SYNTHETIC":
            system.run("solver", "setup", single=True, model="true")
            system.run("solver", "generate_data")

        system.run("solver", "setup", single=True, model="init")

    def evaluate_function(self, path, suffix):
        """
        Performs forward simulation, and evaluates the objective function.

        Differs from Inversion.evaluate_function() as it splits the forward
        problem and misfit quantification into two separate system calls,
        rather than a single system call.

        :type path: str
        :param path: path in the scratch directory to use for I/O
        :type suffix: str
        :param suffix: suffix to use for I/O
        """
        print("EVALUATE FUNCTION\n\tRunning forward simulation")
        self.write_model(path=path, suffix=suffix)
        system.run("solver", "eval_fwd", path=path)
        print("\tEvaluating misfit")
        system.run_ancil("solver", "eval_misfit", path=path)
        self.write_misfit(path=path, suffix=suffix)
示例#26
0
class Specfem3DGlobe(custom_import("solver", "specfem3d")):
    """
    Python interface to Specfem3D Globe. This subclass inherits functions
    from seisflows3.solver.specfem3d.Specfem3D

    !!! See base class for method descriptions !!!
    """
    # Class-specific logger accessed using self.logger
    logger = logging.getLogger(__name__).getChild(__qualname__)

    def __init__(self):
        """
        These parameters should not be set by the user.
        Attributes are initialized as NoneTypes for clarity and docstrings.

        :type logger: Logger
        :param logger: Class-specific logging module, log statements pushed
            from this logger will be tagged by its specific module/classname
        """
        super().__init__()

    @property
    def required(self):
        """
        A hard definition of paths and parameters required by this class,
        alongside their necessity for the class and their string explanations.
        """
        sf = SeisFlowsPathsParameters(super().required)

        return sf

    def load(self, path, prefix="reg1_", suffix="", parameters=None):
        """
        Reads SPECFEM model or kernel

        Models are stored in Fortran binary format and separated into
        multiple files according to material parameter and processor rank.

        :type path: str
        :param path: directory from which model is read
        :type prefix: str
        :param prefix: optional filename prefix
        :type suffix: str
        :param suffix: optional filename suffix, eg '_kernel'
        :type parameters: list
        :param parameters: material parameters to be read
            (if empty, defaults to self.parameters)
        :rtype: dict
        :return: model or kernels indexed by material parameter and
            processor rank, ie dict[parameter][iproc]
        """
        parameters = parameters or self.parameters

        model = Model(parameters)
        minmax = Minmax(parameters)

        for iproc in range(self.mesh_properties.nproc):
            # read database files based on parameters
            keys, vals = loadbypar(path, self.parameters, iproc, prefix,
                                   suffix)
            for key, val in zip(keys, vals):
                model[key] += [val]

            minmax.update(keys, vals)

        return model

    def save(self, path, model, prefix="reg1_", suffix=""):
        """
        Writes SPECFEM3D_GLOBE transerverly isotropic model

        :type path: str
        :param path:
        :type model
        :param model:
        :type prefix: str
        :param prefix: prefix that begins the name of the model parameters
        :type suffix: str
        :param suffix: that follow the name of model parameters
        """
        unix.mkdir(path)

        for iproc in range(self.mesh_properties.nproc):
            for check_key in ["vpv", "vph", "vsv", "vsh", "eta"]:
                if check_key in self.parameters:
                    savebin(model[key][iproc], path, iproc,
                            prefix + key + suffix)
                elif 'kernel' in suffix:
                    pass
                else:
                    src = PATH.OUTPUT + '/' + 'model_init'
                    dst = path
                    copybin(src, dst, iproc, prefix + key + suffix)

            if 'rho' in self.parameters:
                savebin(model['rho'][iproc], path, iproc,
                        prefix + 'rho' + suffix)
            elif 'kernel' in suffix:
                pass
            else:
                src = PATH.OUTPUT + '/' + 'model_init'
                dst = path
                copybin(src, dst, iproc, prefix + 'rho' + suffix)

    def check_mesh_properties(self, path=None, parameters=None):
        """
        Determine if Mesh properties are okay for workflow

        :type path: str
        :param path: path to the mesh file
        """
        if not hasattr(self, '_mesh_properties'):
            if path is None:
                path = PATH.MODEL_INIT

            if parameters is None:
                parameters = self.parameters

            nproc = 0
            ngll = []
            while True:
                dummy = loadbin(path, nproc, 'reg1_' + parameters[0])
                ngll += [len(dummy)]
                nproc += 1
                if not exists(
                        os.path.join(path,
                                     f"proc{nrpoc}_reg1_{parameters[0]}.bin")):
                    break

            self._mesh_properties = Struct([['nproc', nproc], ['ngll', ngll]])

    def rename_data(self):
        """
        Works around conflicting data filename conventions

        Specfem3D's uses different name conventions for regular traces
        and 'adjoint' traces
        """
        files = glob(os.path.join(self.cwd, "traces", "adj", "*sem.ascii"))
        unix.rename("sem.ascii", "sem.ascii.adj", files)

    def initialize_adjoint_traces(self):
        """
        Setup utility: Creates the "adjoint traces" expected by SPECFEM

        !!! This probably doesnt work

        Note:
            Adjoint traces are initialized by writing zeros for all channels.
            Channels actually in use during an inversion or migration will be
            overwritten with nonzero values later on.
        """
        super().initialize_adjoint_traces()

        # workaround for  SPECFEM's use of different name conventions for
        # regular traces and 'adjoint' traces
        if PAR.FORMAT.upper() in ['ASCII', 'ascii']:
            files = glob(os.path.join(self.cwd, "traces", "adj", "*sem.ascii"))
            unix.rename("sem.ascii", "adj", files)

    @property
    def data_wildcard(self):
        """
        Returns a wildcard identifier for synthetic data

        :rtype: str
        :return: wildcard identifier for channels
        """
        if PAR.FORMAT.upper() == "ASCII":
            return f"*.?X?.sem.ascii"

    @property
    def data_filenames(self):
        """
        Returns the filenames of all data, either by the requested components
        or by all available files in the directory.

        :rtype: list
        :return: list of data filenames
        """
        unix.cd(os.path.join(self.cwd, "traces", "obs"))

        if PAR.FORMAT.upper() == "ASCII":
            return sorted(glob("*.???.sem.ascii"))