示例#1
0
 def main(self):
     """
     Setup the example and then optionally run the actual seisflows workflow
     """
     print(msg.cli("EXAMPLE SETUP", border="="))
     # Step 1: Download and configure SPECFEM2D, make binaries. Optional
     self.download_specfem2d()
     self.configure_specfem2d_and_make_binaries()
     # Step 2: Create a working directory and generate initial/final models
     self.create_specfem2d_working_directory()
     # Step 2a: Generate MODEL_INIT, rearrange consequent directory structure
     print(msg.cli("GENERATING INITIAL MODEL", border="="))
     self.setup_specfem2d_for_model_init()
     self.run_xspecfem2d_binaries()
     self.cleanup_xspecfem2d_run(choice="INIT")
     # Step 2b: Generate MODEL_INIT, rearrange consequent directory structure
     print(msg.cli("GENERATING TRUE/TARGET MODEL", border="="))
     self.setup_specfem2d_for_model_true()
     self.run_xspecfem2d_binaries()
     self.cleanup_xspecfem2d_run(choice="TRUE")
     # Step 3: Prepare Par_file and directory for MODEL_TRUE generation
     self.setup_seisflows_working_directory()
     self.finalize_specfem2d_par_file()
     print(msg.cli("COMPLETE EXAMPLE SETUP", border="="))
     # Step 4: Run the workflwo
     if self.run_example:
         print(msg.cli("RUNNING SEISFLOWS3 INVERSION WORKFLOW", border="="))
         self.run_sf3_example()
示例#2
0
    def __init__(self, ntask=3, niter=2):
        """
        Set path structure which is used to navigate around SPECFEM repositories
        and the example working directory

        :type ntask: int
        :param ntask: number of events to use in inversion, between 1 and 25.
            defaults to 3
        :type niter: int
        :param niter: number of iterations to run. defaults to 2
        """
        specfem2d_repo = input(
            msg.cli("If you have already downloaded SPECMFE2D, please input "
                    "the full path to the repo. If left blank, this example "
                    "will pull the latest version from GitHub and attempt "
                    "to configure and make the binaries:\n> "))

        self.cwd = os.getcwd()
        self.sem2d_paths, self.workdir_paths = self.define_dir_structures(
            cwd=self.cwd, specfem2d_repo=specfem2d_repo)
        self.ntask = ntask
        assert(1 <= self.ntask <= 25), \
            f"number of tasks/events must be between 1 and 25, not {self.ntask}"
        self.niter = niter
        assert(1 <= self.niter <= np.inf), \
            f"number of iterations must be between 1 and inf, not {self.niter}"

        # This bool information is provided by the User running 'setup' or 'run'
        self.run_example = bool(sys.argv[1] == "run")

        # Command line tool to use $ seisflows <cmd> from inside Python
        # Zero out sys.argv to ensure that no arguments are given to the CLI
        sys.argv = [sys.argv[0]]
        self.sf = SeisFlows()
示例#3
0
文件: base.py 项目: bch0w/seisflows
    def export_residuals(self, path):
        """
        File transfer utility. Export residuals to disk.

        :type path: str
        :param path: path to save residuals
        """
        if self.taskid == 0:
            self.logger.debug(f"exporting residuals to:\n{path}")

        unix.mkdir(os.path.join(path, "residuals"))
        src = os.path.join(self.cwd, "residuals")

        # If this residuals directory has not been created, something
        # has gone wrong with the preprocessing and workflow cannot proceed
        if not os.path.exists(src):
            print(
                msg.cli(
                    "The Solver function 'export_residuals' expected "
                    "'residuals' directories to be created but could not "
                    "find them and cannot continue the workflow. Please "
                    "check the preprocess.prepare_eval_grad() function",
                    header="preprocess error",
                    border="="))
            sys.exit(-1)

        dst = os.path.join(path, "residuals", self.source_name)
        unix.mv(src, dst)
示例#4
0
def check_poissons_ratio(vp, vs, min_val=-1., max_val=0.5):
    """
    Check Poisson's ratio based on Vp and Vs model vectors. Exit SeisFlows3 if
    Poisson's ratio is outside `min_val` or `max_val` which by default are
    set internally by SPECFEM. Otherwise return the value

    :type vp: np.array
    :param vp: P-wave velocity model vector
    :type vs: np.array
    :param vp: S-wave velocity model vector
    :type min_val: float
    :param min_val: minimum model-wide acceptable value for poissons ratio
    :type max_val: float
    :param max_val: maximum model-wide acceptable value for poissons ratio
    :return:
    """
    poissons = poissons_ratio(vp=vp, vs=vs)
    pmin = poissons.min()
    pmax = poissons.max()
    if (pmin < min_val) or (pmax > max_val):
        print(
            msg.cli(
                f"The Poisson's ratio of the given model is out of "
                f"bounds with respect to the defined range "
                f"({min_val}, {max_val}). "
                f"The model bounds were found to be:",
                items=[f"{pmin:.2f} < PR < {pmax:.2f}"],
                border="=",
                header="Poisson's Ratio Error"))
        sys.exit(-1)
    return poissons
示例#5
0
文件: base.py 项目: bch0w/seisflows
    def check_source_names(self):
        """
        Determines names of sources by applying wildcard rule to
        user-supplied input files

        .. note::
            Source list is sorted and collected from start up to PAR.NTASK
        """
        # Apply wildcard rule and check for available sources, exit if no
        # sources found because then we can't proceed
        wildcard = f"{self.source_prefix}_*"
        fids = sorted(glob(os.path.join(PATH.SPECFEM_DATA, wildcard)))
        if not fids:
            print(
                msg.cli(
                    "No matching source files when searching PATH for"
                    "the given WILDCARD",
                    items=[
                        f"PATH: {PATH.SPECFEM_DATA}", f"WILDCARD: {wildcard}"
                    ],
                    header="error"))
            sys.exit(-1)

        # Create internal definition of sources names by stripping prefixes
        names = [os.path.basename(fid).split("_")[-1] for fid in fids]
        self._source_names = names[:PAR.NTASK]
示例#6
0
文件: lsf.py 项目: bch0w/seisflows
    def job_status(self, classname, method, jobs):
        """
        Queries completion status of a single job

        :type job: str
        :param job: job id to query
        """
        job_finished = []
        for job in jobs:
            state = self._query(job)
            if state == "DONE":
                job_finished.append(True)
            else:
                job_finished.append(False)

            if state == "EXIT":
                print(
                    msg.cli(
                        f"LSF job {job} failed to execute "
                        f"{classname}.{method}.",
                        header="error",
                        border="="))
                sys.exit(-1)

        isdone = all(job_finished)

        return isdone, jobs
示例#7
0
    def sum_residuals(self, files):
        """
        Averages the event misfits and returns the total misfit.
        Total misfit defined by Tape et al. (2010)

        :type files: str
        :param files: list of single-column text files containing residuals
            that will have been generated using prepare_eval_grad()
        :rtype: float
        :return: average misfit
        """
        if len(files) != PAR.NTASK:
            print(msg.cli(f"Pyatoa preprocessing module did not recover the "
                          f"correct number of residual files "
                          f"({len(files)}/{PAR.NTASK}). Please check that "
                          f"the preprocessing logs", header="error")
                  )
            sys.exit(-1)

        total_misfit = 0
        for filename in files:
            total_misfit += np.sum(np.loadtxt(filename))

        total_misfit /= PAR.NTASK

        return total_misfit
    def __init__(self, ntask=2, niter=1, nsta=5):
        """
        Overload init and attempt to import Pyatoa before running example,
        overload the default number of tasks to 2, and add a new init parameter
        `nsta` which chooses the number of stations, between 1 and 132

        :type ntask: int
        :param ntask: number of events to use in inversion, between 1 and 25.
            defaults to 3
        :type niter: int
        :param niter: number of iterations to run. defaults to 2
        :type nsta: int
        :param nsta: number of stations to include in inversion, between 1 and
            131
        """
        super().__init__(ntask=ntask, niter=niter)
        self.nsta = nsta
        # -1 because it represents index but we need to talk in terms of count
        assert(1 <= self.nsta <= 131), \
            f"number of stations must be between 1 and 131, not {self.nsta}"
        # Make sure that Pyatoa has been installed before running
        try:
            import pyatoa
        except ModuleNotFoundError:
            print(
                msg.cli(
                    "Module Pyatoa not found but is required for this "
                    "example. Please install Pyatoa and rerun this "
                    "example.",
                    header="module not found error",
                    border="="))
            sys.exit(-1)
示例#9
0
    def check_solver_parameter_files(self):
        """
        Checks solver parameters 
        """
        # Check the number of steps in the SPECFEM2D Par_file
        nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file")
        if int(nt) != PAR.NT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D {nt_str}=={nt} is not equal "
                        f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file")
        if float(dt) != PAR.DT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D {dt_str}=={dt} is not equal "
                        f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Ensure that NPROC matches the MESH values
        nproc = self.mesh_properties.nproc
        if nproc != PAR.NPROC:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM3D mesh NPROC=={nproc} is not equal "
                        f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. "
                        f"Please check that your mesh matches this val.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        if "MULTIPLES" in PAR:
            raise NotImplementedError
示例#10
0
    def prepare_eval_grad(self, cwd, source_name, taskid, **kwargs):
        """
        Prepare the gradient evaluation by gathering, preprocessing waveforms, 
        and measuring misfit between observations and synthetics using Pyatoa.

        Reads in observed and synthetic waveforms, applies optional
        preprocessing, assesses misfit, and writes out adjoint sources and
        STATIONS_ADJOINT file.

        .. note::
            Meant to be called by solver.eval_func(), may have unused arguments
            to keep functions general across preprocessing subclasses.

        :type cwd: str
        :param cwd: current specfem working directory containing observed and
            synthetic seismic data to be read and processed. Should be defined
            by solver.cwd
        :type source_name: str
        :param source_name: the event id to be used for tagging and data lookup.
            Should be defined by solver.source_name
        :type taskid: int
        :param taskid: identifier of the currently running solver instance.
            Should be defined by solver.taskid
        :type filenames: list of str
        :param filenames: [not used] list of filenames defining the files in
            traces
        """
        if taskid == 0:
            self.logger.debug("preparing files for gradient evaluation with "
                              "Pyaflowa")

        # Process all the stations for a given event using Pyaflowa
        pyaflowa = self.setup_event_pyaflowa(source_name)
        scaled_misfit = pyaflowa.process()

        if scaled_misfit is None:
            print(msg.cli(f"Event {source_name} returned no misfit, you may "
                          f"want to check logs and waveform figures, "
                          f"or consider discarding this event from your "
                          f"workflow", 
                          items=[pyaflowa.paths.logs, pyaflowa.paths.figures],
                          header="pyatoa preprocessing error", border="="))
            sys.exit(-1)

        # Event misfit defined by Tape et al. (2010) written to solver dir.
        self.write_residuals(path=cwd, scaled_misfit=scaled_misfit)
示例#11
0
文件: base.py 项目: bch0w/seisflows
    def process_kernels(path, logger):
        """
        Sums kernels from individual sources, with optional smoothing

        .. note::
            This function needs to be run on system, i.e., called by
            system.run(single=True)

        :type path: str
        :param path: directory containing sensitivity kernels in the scratch
            directory
        :type logger: Logger
        :param logger: Class-specific logging module, log statements pushed
            from this logger will be tagged by its specific module/classname
        """
        if not os.path.exists(path):
            print(
                msg.cli(
                    "Gradient path in postprocess.process_kernels "
                    "does not exist but should",
                    items=[path],
                    header="error"))
            sys.exit(-1)

        # If specified, smooth the kernels in the vertical and horizontal
        path_sum_nosmooth = os.path.join(path, "sum_nosmooth")
        path_sum = os.path.join(path, "sum")

        if (PAR.SMOOTH_H > 0) or (PAR.SMOOTH_V > 0):
            logger.debug(f"saving unsmoothed and summed kernels to:\n"
                         f"{path_sum_nosmooth}")
            solver.combine(input_path=path, output_path=path_sum_nosmooth)

            logger.info(f"smoothing gradient: H={PAR.SMOOTH_H}m, "
                        f"V={PAR.SMOOTH_V}m")
            logger.debug(f"saving smoothed kernels to:\n{path_sum}")
            solver.smooth(input_path=path_sum_nosmooth,
                          output_path=path_sum,
                          span_h=PAR.SMOOTH_H,
                          span_v=PAR.SMOOTH_V)

        # Combine all the input kernels, generating the unscaled gradient
        else:
            logger.debug(f"saving summed kernels to:\n{path_sum}")
            solver.combine(input_path=path, output_path=path_sum)
示例#12
0
文件: base.py 项目: bch0w/seisflows
    def check_mesh_properties(self, path=None):
        """
        Determine if Mesh properties are okay for workflow

        :type path: str
        :param path: path to the mesh file
        """
        # Check the given model path or the initial model
        if path is None:
            path = PATH.MODEL_INIT

        if not exists(path):
            print(
                msg.cli(f"The following mesh path does not exist but should",
                        items=[path],
                        header="solver error",
                        border="="))
            sys.exit(-1)

        # Count slices and grid points
        key = self.parameters[0]
        iproc = 0
        ngll = []
        while True:
            dummy = self.io.read_slice(path=path, parameters=key,
                                       iproc=iproc)[0]
            ngll += [len(dummy)]
            iproc += 1
            if not exists(os.path.join(path,
                                       f"proc{int(iproc):06d}_{key}.bin")):
                break
        nproc = iproc

        # Create coordinate pointers
        # !!! This partial is incorrectly defined and does not execute when
        # !!! called. What is the point of that?
        coords = Struct()
        for key in ['x', 'y', 'z']:
            coords[key] = partial(self.io.read_slice, self, path, key)

        # Define internal mesh properties
        self._mesh_properties = Struct([["nproc", nproc], ["ngll", ngll],
                                        ["path", path], ["coords", coords]])
示例#13
0
    def data_filenames(self):
        """
        Returns the filenames of all data, either by the requested components
        or by all available files in the directory.

        .. note:: 
            If the glob returns an  empty list, this function exits the 
            workflow because filenames should  not be empty is they're being 
            queried

        :rtype: list
        :return: list of data filenames
        """
        unix.cd(self.cwd)
        unix.cd(os.path.join("traces", "obs"))

        if PAR.COMPONENTS:
            filenames = []
            if PAR.FORMAT.upper() == "SU":
                for comp in PAR.COMPONENTS:
                    filenames += [self.data_wildcard.format(comp=comp.lower())]
                    # filenames += [f"U{comp.lower()}_file_single.su"]
            elif PAR.FORMAT.upper() == "ASCII":
                for comp in PAR.COMPONENTS:
                    filenames += glob(
                        self.data_wildcard.format(comp=comp.upper()))
                    # filenames += glob(f"*.?X{comp.upper()}.sem?")
        else:
            filenames = glob(self.data_wildcard)

        if not filenames:
            print(
                msg.cli(
                    "The property solver.data_filenames, used to search "
                    "for traces in 'scratch/solver/*/traces' is empty "
                    "and should not be. Please check solver parameters: ",
                    items=[f"data_wildcard: {self.data_wildcard}"],
                    header="data filenames error",
                    border="="))
            sys.exit(-1)

        return filenames
示例#14
0
    def taskid(self):
        """
        Provides a unique identifier for each running task, which should be set
        by the 'run'' command.

        :rtype: int
        :return: returns the os environment variable SEISFLOWS_TASKID which is
            set by run() to label each of the currently
            running processes on the SYSTEM.
        """
        sftaskid = os.getenv("SEISFLOWS_TASKID")
        if sftaskid is None:
            print(
                msg.cli(
                    "system.taskid() environment variable not found. "
                    "Assuming DEBUG mode and returning taskid==0. "
                    "If not DEBUG mode, please check SYSTEM.run()",
                    header="warning",
                    border="="))
            sftaskid = 0
        return int(sftaskid)
示例#15
0
def call_solver(mpiexec, executable, output="solver.log"):
    """
    Calls MPI solver executable to run solver binaries, used by individual
    processes to run the solver on system. If the external solver returns a 
    non-zero exit code (failure), this function will return a negative boolean.

    :type mpiexec: str
    :param mpiexec: call to mpi. If None (e.g., serial run, defaults to ./)
    :type executable: str
    :param executable: executable function to call
    :type output: str
    :param output: where to redirect stdout
    """
    # mpiexec is None when running in serial mode, so e.g., ./xmeshfem2D
    if mpiexec is None:
        exc_cmd = f"./{executable}"
    # Otherwise mpiexec is system dependent (e.g., srun, mpirun)
    else:
        exc_cmd = f"{mpiexec} {executable}"

    try:
        # Write solver stdout (log files) to text file
        f = open(output, "w")
        subprocess.run(exc_cmd, shell=True, check=True, stdout=f)
    except (subprocess.CalledProcessError, OSError) as e:
        print(
            msg.cli(
                "The external numerical solver has returned a nonzero "
                "exit code (failure). Consider stopping any currently "
                "running jobs to avoid wasted computational resources. "
                f"Check 'scratch/solver/mainsolver/{output}' for the "
                f"solvers stdout log message. "
                f"The failing command and error message are: ",
                items=[f"exc: {exc_cmd}", f"err: {e}"],
                header="external solver error",
                border="="))
        sys.exit(-1)
    finally:
        f.close()
示例#16
0
    def taskid(self):
        """
        Provides a unique identifier for each running task

        :rtype: int
        :return: identifier for a given task
        """
        # If not set, this environment variable will return None
        sftaskid = os.getenv("SEISFLOWS_TASKID")

        if sftaskid is None:
            sftaskid = os.getenv("SLURM_ARRAY_TASK_ID")
            if sftaskid is None:
                print(
                    msg.cli(
                        "system.taskid() environment variable not found. "
                        "Assuming DEBUG mode and returning taskid==0. "
                        "If not DEBUG mode, please check SYSTEM.run()",
                        header="warning",
                        border="="))
                sftaskid = 0

        return int(sftaskid)
示例#17
0
文件: base.py 项目: bch0w/seisflows
    def eval_grad(self, path, export_traces=False):
        """
        High level solver interface that evaluates gradient by carrying out
        adjoint simulations. A function evaluation must already have been
        carried out.

        :type path: str
        :param path: directory from which model is imported
        :type export_traces: bool
        :param export_traces: if True, save traces to OUTPUT.
            if False, discard traces
        """
        unix.cd(self.cwd)
        if self.taskid == 0:
            self.logger.debug("running adjoint simulations")

        # Check to make sure that preprocessing module created adjoint traces
        adjoint_traces_wildcard = os.path.join("traces", "adj", "*")
        if not glob(adjoint_traces_wildcard):
            print(
                msg.cli(
                    f"Event {self.source_name} has no adjoint traces, "
                    f"which will lead to an external solver error. "
                    f"Please check that solver.eval_func() executed "
                    f"properly",
                    border="=",
                    header="solver error"))
            sys.exit(-1)

        self.adjoint()
        self.export_kernels(path)

        if export_traces:
            self.export_traces(path=os.path.join(path, "traces", "syn"),
                               prefix="traces/syn")
            self.export_traces(path=os.path.join(path, "traces", "adj"),
                               prefix="traces/adj")
示例#18
0
文件: base.py 项目: bch0w/seisflows
    def check_stop_resume_cond(self, flow):
        """
        Chek the stop after and resume from conditions

        Allow the main() function to resume a workflow from a given flow
        argument, or stop the workflow after a given argument. In the event
        that a previous workflow errored, or if the User had previously
        stopped a workflow to look at results and they want to pick up where
        they left off.

        Late check: Exits the workflow if RESUME_FROM or STOP_AFTER arguments
        do not match any of the given flow arguments.

        :type flow: tuple of functions
        :param flow: an ordered list of functions that will be
        :rtype: tuple of int
        :return: (start, stop) indices of the `flow` input dictating where the
            list should be begun and ended. If RESUME_FROM and STOP_AFTER
            conditions are NOT given by the user, start and stop will be 0 and
            -1 respectively, meaning we should execute the ENTIRE list
        """
        fxnames = [func.__name__ for func in flow]

        # Default values which dictate that flow will execute in its entirety
        start_idx = None
        stop_idx = None

        # Overwrite start_idx if RESUME_FROM given, exit condition if no match
        if PAR.RESUME_FROM:
            try:
                start_idx = fxnames.index(PAR.RESUME_FROM)
                fx_name = flow[start_idx].__name__
                self.logger.info(
                    msg.mnr(f"WORKFLOW WILL RESUME FROM FUNC: '{fx_name}'"))
            except ValueError:
                self.logger.info(
                    msg.cli(
                        f"{PAR.RESUME_FROM} does not correspond to any FLOW "
                        f"functions. Please check that PAR.RESUME_FROM "
                        f"matches one of the functions listed out in "
                        f"`seisflows print flow`.",
                        header="error",
                        border="="))
                sys.exit(-1)

        # Overwrite stop_idx if STOP_AFTER provided, exit condition if no match
        if PAR.STOP_AFTER:
            try:
                stop_idx = fxnames.index(PAR.STOP_AFTER)
                fx_name = flow[stop_idx].__name__
                stop_idx += 1  # increment to stop AFTER, due to python indexing
                self.logger.info(
                    msg.mnr(f"WORKFLOW WILL STOP AFTER FUNC: '{fx_name}'"))
            except ValueError:
                self.logger.info(
                    msg.cli(
                        f"{PAR.STOP_AFTER} does not correspond to any FLOW "
                        f"functions. Please check that PAR.STOP_AFTER "
                        f"matches one of the functions listed out in "
                        f"`seisflows print flow`.",
                        header="error",
                        border="="))
                sys.exit(-1)

        # Make sure stop after doesn't come before resume_from, otherwise none
        # of the flow will execute
        if PAR.STOP_AFTER and PAR.RESUME_FROM:
            if stop_idx <= start_idx:
                self.logger.info(
                    msg.cli(
                        f"PAR.STOP_AFTER=='{PAR.STOP_AFTER}' is called "
                        f"before PAR.RESUME_FROM=='{PAR.RESUME_FROM}' in "
                        f"the FLOW functions. Please adjust accordingly "
                        f"and rerun.",
                        header="error",
                        border="="))
                sys.exit(-1)

        return start_idx, stop_idx
示例#19
0
    def run(self, classname, method, single=False, run_call=None, **kwargs):
        """
        Runs task multiple times in embarrassingly parallel fasion on a SLURM
        cluster. Executes classname.method(*args, **kwargs) `NTASK` times,
        each time on `NPROC` CPU cores

        .. note::
            The actual CLI call structure looks something like this
            $ sbatch --args scripts/run OUTPUT class method environs

        :type classname: str
        :param classname: the class to run
        :type method: str
        :param method: the method from the given `classname` to run
        :type single: bool
        :param single: run a single-process, non-parallel task, such as
            smoothing the gradient, which only needs to be run by once.
            This will change how the job array and the number of tasks is
            defined, such that the job is submitted as a single-core job to
            the system.
        :type run_call: str
        :param run_call: subclasses (e.g., specific SLURM cluster subclasses)
            can overload the sbatch command line input by setting
            run_call. If set to None, default run_call will be set here.
        """
        self.checkpoint(PATH.OUTPUT, classname, method, kwargs)

        # Default sbatch command line input, can be overloaded by subclasses
        # Copy-paste this default run_call and adjust accordingly for subclass
        if run_call is None:
            run_call = " ".join([
                "sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}",
                f"--nodes={math.ceil(PAR.NPROC/float(PAR.NODESIZE)):d}",
                f"--ntasks-per-node={PAR.NODESIZE:d}",
                f"--ntasks={PAR.NPROC:d}", f"--time={PAR.TASKTIME:d}",
                f"--output={os.path.join(PATH.WORKDIR, 'logs', '%A_%a')}",
                f"--array=0-{PAR.NTASK-1 % PAR.NTASKMAX}",
                f"{os.path.join(ROOT_DIR, 'scripts', 'run')}",
                f"--output {PATH.OUTPUT}", f"--classname {classname}",
                f"--funcname {method}", f"--environment {PAR.ENVIRONS or ''}"
            ])
            self.logger.debug(run_call)

        # Single-process jobs simply need to replace a few sbatch arguments.
        # Do it AFTER `run_call` has been defined so that subclasses submitting
        # custom run calls can still benefit from this
        if single:
            self.logger.info("replacing parts of sbatch run call for single "
                             "process job")
            for part in run_call.split(" "):
                if "--array" in part:
                    run_call.replace(part, "--array=0-0")
                elif "--ntasks" in part:
                    run_call.replace(part, "--ntasks=1")
            # Append taskid to environment variable, deal with the case where
            # PAR.ENVIRONS is an empty string
            task_id_str = "SEISFLOWS_TASKID=0"
            if not run_call.strip().endswith("--environment"):
                task_id_str = f",{task_id_str}"  # appending to the list of vars
            run_call += task_id_str
            self.logger.debug(run_call)

        # The standard response from SLURM when submitting jobs
        # is something like 'Submitted batch job 441636', we want job number
        stdout = subprocess.run(run_call,
                                stdout=subprocess.PIPE,
                                text=True,
                                shell=True).stdout
        job_ids = job_id_list(stdout, single)

        # Contiously check for job completion on ALL running array jobs
        is_done = False
        count = 0
        bad_states = [
            "TIMEOUT", "FAILED", "NODE_FAIL", "OUT_OF_MEMORY", "CANCELLED"
        ]
        while not is_done:
            # Wait a bit to avoid rapidly querying sacct
            time.sleep(5)
            is_done, states = job_array_status(job_ids)
            # EXIT CONDITION: if any of the jobs provide job failure codes
            if not is_done:
                for i, state in enumerate(states):
                    # Sometimes states can be something like 'CANCELLED+', so
                    # we can't do exact string matching, check partial matches
                    if any([check in state for check in bad_states]):
                        print(
                            msg.cli((f"Stopping workflow for {state} job. "
                                     f"Please check log file for details."),
                                    items=[
                                        f"TASK:    {classname}.{method}",
                                        f"TASK ID: {job_ids[i]}",
                                        f"LOG:     logs/{job_ids[i]}",
                                        f"SBATCH:  {run_call}"
                                    ],
                                    header="slurm run error",
                                    border="="))
                        sys.exit(-1)
            # WAIT CONDITION: if sacct is not working, we'll get stuck in a loop
            if "UNDEFINED" in states:
                count += 1
                # Every 10 counts, warn the user this is unexpected behavior
                if not count % 10:
                    job_id = job_ids[states.index("UNDEFINED")]
                    self.logger.warning(
                        f"SLURM command 'sacct {job_id}' has "
                        f"returned unexpected response {count} "
                        f"times. This job may have failed "
                        f"unexpectedly. Consider checking "
                        f"manually")

        self.logger.info(f"Task {classname}.{method} finished successfully")
示例#20
0
        if self.run_example:
            print(msg.cli("RUNNING SEISFLOWS3 INVERSION WORKFLOW", border="="))
            self.run_sf3_example()


if __name__ == "__main__":
    print(msg.ascii_logo_small)
    print(
        msg.cli(
            f"This is a [SPECFEM2D] [WORKSTATION] example, which will "
            f"run an inversion to assess misfit between two homogeneous halfspace "
            f"models with slightly different velocities. [3 events, 1 station, 2 "
            f"iterations]. The tasks involved include: ",
            items=[
                "1. (optional) Download, configure, compile SPECFEM2D",
                "2. Set up a SPECFEM2D working directory",
                "3. Generate starting model from Tape2007 example",
                "4. Generate target model w/ perturbed starting model",
                "5. Set up a SeisFlows3 working directory",
                f"6. Run an inversion workflow"
            ],
            header="seisflows3 example 1",
            border="="))

    # Dynamically traverse sys.argv to get user-input command line. Cannot
    # use argparser here because we're being called by SeisFlows CLI tool which
    # is occupying argparser
    if len(sys.argv) > 1:
        sf3ex2d = SF3Example2D()
        sf3ex2d.main()
示例#21
0
    def check_solver_parameter_files(self):
        """
        Checks SPECFEM2D Par_file for acceptable parameters and matches with
        the internally set parameters
        """
        # Check the number of steps in the SPECFEM2D Par_file
        nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file")
        if int(nt) != PAR.NT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {nt_str}=={nt} is not equal "
                        f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file")
        if float(dt) != PAR.DT:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {dt_str}=={dt} is not equal "
                        f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure "
                        f"that these values match in both files.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Check the central frequency in the SPECFEM2D SOURCE file
        f0_str, f0, f0_i = getpar(key="f0", file="DATA/SOURCE")
        if float(f0) != PAR.F0:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D {f0_str}=={f0} is not equal "
                        f"SeisFlows3 PAR.F0=={PAR.F0}. Please ensure "
                        f"that these values match the DATA/SOURCE file.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        # Ensure that NPROC matches the MESH values
        nproc = self.mesh_properties.nproc
        if nproc != PAR.NPROC:
            if self.taskid == 0:
                print(
                    msg.cli(
                        f"SPECFEM2D mesh NPROC=={nproc} is not equal"
                        f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. "
                        f"Please check that your mesh matches this val.",
                        header="parameter match error",
                        border="="))
                sys.exit(-1)

        if "MULTIPLES" in PAR:
            if PAR.MULTIPLES:
                setpar(key="absorbtop", val=".false.", file="DATA/Par_file")
            else:
                setpar(key="absorbtop", val=".true.", file="DATA/Par_file")
示例#22
0
文件: base.py 项目: bch0w/seisflows
    def write_gradient(self, path):
        """
        Combines contributions from individual sources and material parameters
        to get the gradient, and optionally applies user-supplied scaling

        .. note::
            Because processing operations can be quite expensive, they must be
            run through the HPC system interface; processing does not involve
            embarassingly parallel tasks, we use run(single=True)

        :type path: str
        :param path: directory from which kernels are read and to which
            gradient is written
        """
        if not os.path.exists(path):
            print(
                msg.cli(
                    "Gradient path does in postprocess.write_gradient "
                    "does not exist but should",
                    items=[path],
                    header="error"))
            sys.exit(-1)

        # Postprocess file structure defined here once-and-for-all
        path_grad = os.path.join(path, "gradient")
        path_grad_nomask = os.path.join(path, "gradient_nomask")
        path_kernels = os.path.join(path, "kernels")
        path_kernels_sum = os.path.join(path_kernels, "sum")
        path_model = os.path.join(path, "model")

        # Run postprocessing as job on system as it's computationally intensive
        self.logger.info("processing kernels into gradient on system...")
        system.run("postprocess",
                   "process_kernels",
                   single=True,
                   path=path_kernels,
                   logger=self.logger)

        # Access the gradient information stored in the kernel summation
        gradient = solver.load(path_kernels_sum, suffix="_kernel")

        # Merge the gradients into a single vector
        gradient = solver.merge(gradient)

        # Convert to absolute perturbations:
        # log dm --> dm (see Eq.13 Tromp et al 2005)
        gradient *= solver.merge(solver.load(path_model))

        if PATH.MASK:
            self.logger.info(f"masking gradient")
            # to scale the gradient, users can supply "masks" by exactly
            # mimicking the file format in which models are stored
            mask = solver.merge(solver.load(PATH.MASK))

            # While both masking and preconditioning involve scaling the
            # gradient, they are fundamentally different operations:
            # masking is ad hoc, preconditioning is a change of variables;
            # For more info, see Modrak & Tromp 2016 GJI
            solver.save(solver.split(gradient),
                        path=path_grad_nomask,
                        suffix="_kernel")

            solver.save(solver.split(gradient * mask),
                        path=path_grad,
                        suffix="_kernel")
        else:
            solver.save(solver.split(gradient),
                        path=path_grad,
                        suffix="_kernel")
示例#23
0
def custom_import(name=None, module=None, classname=None):
    """
    Imports SeisFlows module and extracts class that is the camelcase version
    of the module name

    For example:
        custom_import('workflow', 'inversion')

        imports 'seisflows.workflow.inversion' and, from this module, extracts
        class 'Inversion'.

    :type name: str
    :param name: component of the workflow to import, defined by `names`,
        available: "system", "preprocess", "solver",
                   "postprocess", "optimize", "workflow"
    :type module: module within the workflow component to call upon, e.g.
        seisflows.workflow.inversion, where `inversion` is the module
    :type classname: str
    :param classname: the class to be called from the module. Usually this is
        just the CamelCase version of the module, which will be defaulted to if
        this parameter is set `None`, however allows for custom class naming.
        Note: CamelCase class names following PEP-8 convention.
    """
    # Parse input arguments for custom import
    # Allow empty system to be called so that import error message can be thrown
    if name is None:
        print(
            msg.cli(
                "Please check that 'custom_import' utility is being used as "
                "follows: custom_import(name, module). The resulting full dotted "
                "name 'seisflows3.name.module' must correspond to a module "
                "within this package.",
                header="custom import error",
                border="="))
        sys.exit(-1)
    # Invalid `system` call
    elif name not in NAMES:
        print(
            msg.cli(
                "Please check that the use of custom_import(name, module, class) "
                "is implemented correctly, where name must be in the following:",
                items=NAMES,
                header="custom import error",
                border="="))
        sys.exit(-1)
    # Attempt to retrieve currently assigned classname from parameters
    if module is None:
        try:
            module = sys.modules[PAR][name.upper()]
        except KeyError:
            return Null
        # If this still returns nothing, then no module has been assigned
        # likely the User has turned this module OFF
        if module is None:
            return Null
    # If no method specified, convert classname to PEP-8
    if classname is None:
        # Make a distinction for fully uppercase classnames, e.g. LBFGS
        if module.isupper():
            classname = module.upper()
        # If normal classname, convert to CamelCase
        else:
            classname = module.title().replace("_", "")

    # Check if modules exist, otherwise raise custom exception
    _exists = False
    for package in PACKAGES:
        full_dotted_name = ".".join([package, name, module])
        if module_exists(full_dotted_name):
            _exists = True
            break
    if not _exists:
        print(
            msg.cli(
                f"The following module was not found within the package: "
                f"seisflows3.{name}.{module}",
                header="custom import error",
                border="="))
        sys.exit(-1)

    # If importing the module doesn't work, throw an error. Usually this happens
    # when am external dependency isn't available, e.g., Pyatoa
    try:
        module = import_module(full_dotted_name)
    except Exception as e:
        print(
            msg.cli(f"Module could not be imported {full_dotted_name}",
                    items=[str(e)],
                    header="custom import error",
                    border="="))
        print(traceback.print_exc())
        sys.exit(-1)

    # Extract classname from module if possible
    try:
        return getattr(module, classname)
    except AttributeError:
        print(
            msg.cli(f"The following method was not found in the imported "
                    f"class: seisflows3.{name}.{module}.{classname}"))
        sys.exit(-1)
示例#24
0
def init_seisflows(check=True):
    """
    Instantiates SeisFlows3 objects and makes them globally accessible by
    registering them in sys.modules

    :type check: bool
    :param check: Run parameter and path checking, defined in the module.check()
        functions. By default should be True, to ensure that paths and
        parameters are set correctly. It should only be set False for debug
        and testing purposes when we need to force our way past this safeguard.
    """
    logger.info("initializing SeisFlows3 in sys.modules")

    # Parameters and paths must already be loaded (normally done by submit)
    assert (PAR in sys.modules)
    assert (PATH in sys.modules)

    # Check if objects already exist on disk, exit so as to not overwrite
    if "OUTPUT" in sys.modules[PATH] and \
            os.path.exists(sys.modules[PATH]["OUTPUT"]):
        print(
            msg.cli("Data from previous workflow found in working directory.",
                    items=[
                        "> seisflows restart: delete data and start new "
                        "workflow",
                        "> seisflows resume: resume existing workflow"
                    ],
                    header="warning",
                    border="="))
        sys.exit(-1)

    # Instantiate and register objects
    for name in NAMES:
        sys.modules[f"seisflows_{name}"] = custom_import(name)()

    # Parameter import error checking, missing or improperly set parameters will
    # throw assertion errors
    if check:
        errors = []
        for name in NAMES:
            try:
                sys.modules[f"seisflows_{name}"].check()
            except AssertionError as e:
                errors.append(f"{name}: {e}")
        if errors:
            print(
                msg.cli("seisflows.config module check failed with:",
                        items=errors,
                        header="module check error",
                        border="="))
            sys.exit(-1)

    # Bare minimum module requirements for SeisFlows3
    req_modules = ["WORKFLOW", "SYSTEM"]
    for req in req_modules:
        if not hasattr(sys.modules[PAR], req):
            print(
                msg.cli(
                    f"SeisFlows3 requires defining: {req_modules}."
                    "Please specify these in the parameter file. Use "
                    "'seisflows print module' to determine suitable "
                    "choices.",
                    header="error",
                    border="="))
            sys.exit(-1)
        print(f"> EX2: Using {self.nsta} stations in this inversion workflow")
        with open("STATIONS", "w") as f:
            f.writelines(lines[:self.nsta])


if __name__ == "__main__":
    print(msg.ascii_logo_small)
    print(
        msg.cli(
            f"This is a [SPECFEM2D] [WORKSTATION] example, which will "
            f"run an inversion to assess misfit between a homogeneous halfspace  "
            f"and checkerboard model using Pyatoa for misfit quantification "
            f"[2 events, 5 stations, 1 iterations]. The tasks involved include: ",
            items=[
                "1. (optional) Download, configure, compile SPECFEM2D",
                "2. Set up a SPECFEM2D working directory",
                "3. Generate starting model from Tape2007 example",
                "4. Generate target model w/ perturbed starting model",
                "5. Set up a SeisFlows3 working directory",
                f"6. Run an inversion workflow. The line search is expected to "
                f"attempt 4 evaluations (i01s04)"
            ],
            header="seisflows3 example 2",
            border="="))

    # Dynamically traverse sys.argv to get user-input command line. Cannot
    # use argparser here because we're being called by SeisFlows CLI tool which
    # is occupying argparser
    if len(sys.argv) > 1:
        sf3ex2d = SF3PyatoaEx2D()
        sf3ex2d.main()