class tiger_lg(custom_import('system', 'slurm_lg')): """ Specially designed system interface for tiger.princeton.edu See parent class for more information. """ def check(self): """ Checks parameters and paths """ if 'UUID' not in PAR: setattr(PAR, 'UUID', str(uuid4())) if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join('/scratch/gpfs', getuser(), 'seisflows', PAR.UUID)) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', '') if 'NODESIZE' not in PAR: setattr(PAR, 'NODESIZE', 16) super(tiger_lg, self).check() def submit(self, *args, **kwargs): """ Submits job """ if not exists(PATH.SUBMIT + '/' + 'scratch'): unix.ln(PATH.SCRATCH, PATH.SUBMIT + '/' + 'scratch') super(tiger_lg, self).submit(*args, **kwargs)
class specfem3d_workaround(custom_import('solver', 'specfem3d')): """ Python interface for SPECFEM3D See base class for method descriptions """ def check(self): """ Checks parameters and paths """ super(specfem3d_workaround, self).check() def load_xyz(self, *args, **kwargs): """ reads SPECFEM model or kernel Models are stored in Fortran binary format and separated into multiple files according to material parameter and processor rank. """ model = super(specfem3d_workaround, self).load(*args, **kwargs) model_path = args[0] model['x_loc'] = [] model['y_loc'] = [] model['z_loc'] = [] for iproc in range(self.mesh.nproc): # read database files keys, vals = loadbypar(model_path, ['x_loc','y_loc','z_loc'], iproc, '', '') for key, val in zip(keys, vals): model[key] += [val] return model
class tikhonov1(custom_import('postprocess', 'regularize')): """ Adds regularization options to base class Available options include 0-, 1-, and 2- order Tikhonov and total variation regularization. While the underlying theory is classical, application to unstructured numerical grids via the "seisflows.tools.math.nabla" operator is somewhat complicated. So far, can only be used for 2D inversion, because the required spatial derivative operator "nabla" is not yet available for 3D grids. """ def check(self): """ Checks parameters and paths """ super(tikhonov1, self).check() if 'CREEPING' not in PAR: setattr(PAR, 'CREEPING', False) if not PAR.LAMBDA: raise ValueError def nabla(self, mesh, m, g): if PAR.CREEPING: G, grid = mesh2grid(g, mesh) DG = nabla(G, order=1) dg = grid2mesh(DG, grid, mesh) return -dg / np.mean(m) else: M, grid = mesh2grid(m, mesh) DM = nabla(M, order=1) dm = grid2mesh(DM, grid, mesh) return dm / np.mean(m)
class slurm_sm(custom_import('system', 'mpi')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ super(slurm_sm, self).check() if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'SLURMARGS' not in PAR: setattr(PAR, 'SLURMARGS', '') def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) self.checkpoint() # submit workflow call('sbatch ' + '%s ' % PAR.SLURMARGS + '--job-name=%s '%PAR.TITLE + '--output=%s '%(PATH.SUBMIT +'/'+ 'output.log') + '--cpus-per-task=%d '%PAR.NPROC + '--ntasks=%d '%PAR.NTASK + '--time=%d '%PAR.WALLTIME + findpath('seisflows.system') +'/'+ 'wrappers/submit ' + PATH.OUTPUT)
class tikhonov0(custom_import('postprocess', 'regularize')): """ Adds regularization options to base class Available options include 0-, 1-, and 2- order Tikhonov and total variation regularization. While the underlying theory is classical, application to unstructured numerical grids via the "seisflows.tools.math.nabla" operator is somewhat complicated. So far, can only be used for 2D inversion, because the required spatial derivative operator "nabla" is not yet available for 3D grids. """ def check(self): """ Checks parameters and paths """ super(tikhonov0, self).check() if not PAR.LAMBDA: raise ValueError def nabla(self, mesh, m, g): return m / np.mean(m)
class total_variation(custom_import('postprocess', 'regularize')): """ Adds regularization options to base class So far, can only be used for 2D inversion, because the required spatial derivative operator "nabla" is not yet available for 3D grids. """ def check(self): """ Checks parameters and paths """ super(total_variation, self).check() if not PAR.LAMBDA: raise ValueError if not hasattr(PAR, 'EPSILON'): setattr(PAR, 'EPSILON', 0.) def nabla(self, mesh, m, g): M, grid = mesh2grid(g, mesh) DM = tv(M, epsilon=PAR.EPSILON) dm = grid2mesh(DM, grid, mesh) return dm/np.mean(m)
class lsf_lg(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'STEPTIME' not in PAR: setattr(PAR, 'STEPTIME', 30.) if 'SLEEPTIME' not in PAR: setattr(PAR, 'SLEEPTIME', 1.) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'NTASK' not in PAR: raise ParameterError(PAR, 'NTASK') if 'NPROC' not in PAR: raise ParameterError(PAR, 'NPROC') if 'NODESIZE' not in PAR: raise ParameterError(PAR, 'NODESIZE') if 'LSFARGS' not in PAR: setattr(PAR, 'LSFARGS', '') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', None) if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) if 'SYSTEM' not in PATH: setattr(PATH, 'SYSTEM', join(PATH.SCRATCH, 'system')) def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) unix.mkdir(PATH.SUBMIT + '/' + 'output.lsf') self.checkpoint() # prepare bsub arguments call('bsub ' + '%s ' % PAR.LSFARGS + '-J %s ' % PAR.TITLE + '-o %s ' % (PATH.SUBMIT + '/' + 'output.log') + '-n %d ' % PAR.NODESIZE + '-e %s ' % (PATH.SUBMIT + '/' + 'error.log') + '-R "span[ptile=%d]" ' % PAR.NODESIZE + '-W %d:00 ' % PAR.WALLTIME + findpath('seisflows.system') + '/' + 'wrappers/submit ' + PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts. """ self.save_objects() self.save_kwargs(classname, funcname, kwargs) jobs = self.launch(classname, funcname, hosts) while True: # wait a few seconds before checking status time.sleep(60 * PAR.SLEEPTIME) self.timestamp() isdone, jobs = self.task_status(classname, funcname, jobs) if isdone: return def launch(self, classname, funcname, hosts='all'): unix.mkdir(PATH.SYSTEM) # submit job with open(PATH.SYSTEM + '/' + 'job_id', 'w') as f: call('bsub ' + '%s ' % PAR.LSFARGS + '-n %d ' % PAR.NPROC + '-R "span[ptile=%d]" ' % PAR.NODESIZE + '-W %d:00 ' % PAR.STEPTIME + '-J "%s' % PAR.TITLE + self.launch_args(hosts) + findpath('seisflows.system') + '/' + 'wrapper/run ' + PATH.OUTPUT + ' ' + classname + ' ' + funcname + ' ', stdout=f) # retrieve job ids with open(PATH.SYSTEM + '/' + 'job_id', 'r') as f: # reads one entire line from the file line = f.readline() job_buf = line.split()[1].strip() job = job_buf[1:-1] if hosts == 'all' and PAR.NSRC > 1: nn = range(1, PAR.NSRC + 1) #return [job+'_'+str(ii) for ii in nn] return [job + '[' + str(ii) + ']' for ii in nn] else: return [job] def task_status(self, classname, funcname, jobs): # query lsf database for job in jobs: state = self.getstate(job) states = [] if state in ['DONE']: states += [1] else: states += [0] if state in ['EXIT']: print 'LSF job failed: %s ' % job print msg.TaskError_LSF % (classname, funcname, job) sys.exit(-1) isdone = all(states) return isdone, jobs def launch_args(self, hosts): if hosts == 'all': args = '' args += '[%d-%d] %% %d' % (1, PAR.NSRC, PAR.NTASK) args += '-o %s ' % (PATH.SUBMIT + '/' + 'output.lsf/' + '%J_%I') elif hosts == 'head': args = '' args += '[%d-%d]' % (1, 1) args += '-o %s ' % (PATH.SUBMIT + '/' + 'output.lsf/' + '%J') return args def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ return 'mpiexec ' def getstate(self, jobid): """ Retrives job state from LSF database """ with open(PATH.SYSTEM + '/' + 'job_status', 'w') as f: call('bjobs -a -d "' + jobid + '"', stdout=f) with open(PATH.SYSTEM + '/' + 'job_status', 'r') as f: lines = f.readlines() state = lines[1].split()[2].strip() return state def getnode(self): """ Gets number of running task """ return int(os.getenv('LSB_JOBINDEX')) - 1 def timestamp(self): with open(PATH.SYSTEM + '/' + 'timestamps', 'a') as f: line = time.strftime('%H:%M:%S') + '\n' f.write(line) def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
import numpy as np from seisflows.tools import unix from seisflows.tools.array import loadnpy, savenpy from seisflows.tools.code import exists from seisflows.tools.config import SeisflowsParameters, SeisflowsPaths, \ custom_import, ParameterError PAR = SeisflowsParameters() PATH = SeisflowsPaths() import solver import postprocess migration = custom_import('workflow','migration')() class test_postprocess(object): """ Postprocessing class """ def check(self): """ Checks parameters and paths """ migration.check() if 'INPUT' not in PATH: setattr(PATH, 'INPUT', None)
class pbs_lg(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'STEPTIME' not in PAR: setattr(PAR, 'STEPTIME', 30.) if 'SLEEPTIME' not in PAR: setattr(PAR, 'SLEEPTIME', 1.) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'NTASK' not in PAR: raise ParameterError(PAR, 'NTASK') if 'NPROC' not in PAR: raise ParameterError(PAR, 'NPROC') if 'NODESIZE' not in PAR: raise ParameterError(PAR, 'NODESIZE') if 'PBSARGS' not in PAR: setattr(PAR, 'PBSARGS', '') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', None) if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) if 'SYSTEM' not in PATH: setattr(PATH, 'SYSTEM', join(PATH.SCRATCH, 'system')) def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) unix.mkdir(PATH.SUBMIT + '/' + 'output.pbs') self.checkpoint() hours = PAR.WALLTIME / 60 minutes = PAR.WALLTIME % 60 walltime = 'walltime=%02d:%02d:00 ' % (hours, minutes) ncpus = PAR.NODESIZE mpiprocs = PAR.NODESIZE # prepare qsub arguments call('qsub ' + '%s ' % PAR.PBSARGS + '-l select=1:ncpus=%d:mpiprocs=%d ' % (ncpus, mpiprocs) + '-l %s ' % walltime + '-N %s ' % PAR.TITLE + '-j %s ' % 'oe' + '-o %s ' % (PATH.SUBMIT + '/' + 'output.log') + '-V ' + ' -- ' + findpath('seisflows.system') + '/' + 'wrappers/submit ' + PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts. """ self.checkpoint() self.save_kwargs(classname, funcname, kwargs) jobs = self._launch(classname, funcname, hosts) while True: time.sleep(60. * PAR.SLEEPTIME) self._timestamp() isdone, jobs = self._status(classname, funcname, jobs) if isdone: return def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ return 'mpiexec ' def getnode(self): """ Gets number of running task """ try: return os.getenv('PBS_ARRAY_INDEX') except: raise Exception( "PBS_ARRAY_INDEX environment variable not defined.") ### private methods def _launch(self, classname, funcname, hosts='all'): unix.mkdir(PATH.SYSTEM) nodes = math.ceil(PAR.NTASK / float(PAR.NODESIZE)) ncpus = PAR.NPROC mpiprocs = PAR.NPROC hours = PAR.STEPTIME / 60 minutes = PAR.STEPTIME % 60 walltime = 'walltime=%02d:%02d:00 ' % (hours, minutes) # submit job with open(PATH.SYSTEM + '/' + 'job_id', 'w') as f: call( 'qsub ' + '%s ' % PAR.PBSARGS + '-l select=%d:ncpus=%d:mpiprocs=%d ' (nodes, ncpus, mpiprocs) + '-l %s ' % walltime + '-J 0-%s ' % (PAR.NTASK - 1) + '-N %s ' % PAR.TITLE + '-o %s ' % (PATH.SUBMIT + '/' + 'output.pbs/' + '$PBS_ARRAYID') + '-r y ' + '-j oe ' + '-V ' + self.launch_args(hosts) + PATH.OUTPUT + ' ' + classname + ' ' + funcname + ' ' + findpath('seisflows.system'), stdout=f) # retrieve job ids with open(PATH.SYSTEM + '/' + 'job_id', 'r') as f: line = f.readline() job = line.split()[-1].strip() if hosts == 'all' and PAR.NTASK > 1: nn = range(PAR.NTASK) job0 = job.strip('[].sdb') return [job0 + '[' + str(ii) + '].sdb' for ii in nn] else: return [job] def launch_args(self, hosts): if hosts == 'all': arg = ('-J 0-%s ' % (PAR.NTASK - 1) + '-o %s ' % (PATH.SUBMIT + '/' + 'output.pbs/' + '$PBS_ARRAYID') + ' -- ' + findpath('seisflows.system') + '/' + 'wrappers/run_pbsdsh ') elif hosts == 'head': arg = ('-J 0-0 ' + '-o %s ' % (PATH.SUBMIT + '/' + 'output.pbs/' + '$PBS_JOBID') + ' -- ' + findpath('seisflows.system') + '/' + 'wrappers/run_pbsdsh_head ') return arg def _status(self, classname, funcname, jobs): """ Determines completion status of one or more jobs """ for job in jobs: state = self._query(job) states = [] if state in ['C']: states += [1] else: states += [0] if state in ['F']: print msg.TaskError_PBS % (classname, funcname, job) sys.exit(-1) isdone = all(states) return isdone, jobs def _query(self, jobid): """ Queries job state from PBS database """ # TODO: replace shell utilities with native Python with open(PATH.SYSTEM + '/' + 'job_status', 'w') as f: call('qstat -x -tJ ' + jobid + ' | ' + 'tail -n 1 ' + ' | ' + 'awk \'{print $5}\'', stdout=f) with open(PATH.SYSTEM + '/' + 'job_status', 'r') as f: line = f.readline() state = line.strip() return state ### utility function def _timestamp(self): with open(PATH.SYSTEM + '/' + 'timestamps', 'a') as f: line = time.strftime('%H:%M:%S') + '\n' f.write(line) def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class mpi(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'NTASK' not in PAR: setattr(PAR, 'NTASK', 1) if 'NPROC' not in PAR: setattr(PAR, 'NPROC', 1) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'MPIEXEC' not in PAR: setattr(PAR, 'MPIEXEC', 'mpiexec') if 'MPIARGS' not in PAR: setattr(PAR, 'MPIARGS', '--mca mpi_warn_on_fork 0') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', '') if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) self.check_mpi() def submit(self, workflow): """ Submits job """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) self.checkpoint() workflow.main() def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ # to avoid cryptic MPI messages, use "--mca_warn_on_fork 0" as the # default value for MPIARGS, and use subprocess.call rather than # call_catch to invoke mpiexec self.checkpoint() self.save_kwargs(classname, funcname, kwargs) if hosts == 'all': unix.cd(join(findpath('seisflows.system'), 'wrappers')) subprocess.call(PAR.MPIEXEC + ' ' + '-n %d ' % PAR.NTASK + PAR.MPIARGS + ' ' + 'run_mpi' + ' ' + PATH.OUTPUT + ' ' + classname + ' ' + funcname, shell=True) elif hosts == 'head': unix.cd(join(findpath('seisflows.system'), 'wrappers')) subprocess.call(PAR.MPIEXEC + ' ' + '-n 1 ' + PAR.MPIARGS + ' ' + 'run_mpi_head' + ' ' + PATH.OUTPUT + ' ' + classname + ' ' + funcname, shell=True) else: raise (KeyError('Hosts parameter not set/recognized.')) def getnode(self): """Gets number of running task""" from mpi4py import MPI return MPI.COMM_WORLD.Get_rank() def mpiexec(self): """ MPI executable used to invoke solver """ # An empty string causes the solver to be invoked without an mpi # executable such as mpiexec or mpirun. Using an empty string here # presupposes that a simulation runs on a single core, which is # consistent with the PAR.NPROC == 1 assertion below. If you want to # carry out a workflow in which each simulation runs on multiple # cores, use a different system interface such as pbs_lg or slurm_lg. return '' def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs) def check_mpi(self): """ Checks MPI dependencies """ if PAR.NPROC > 1: raise Exception(mpiError1 % PAR.SYSTEM) try: import mpi4py except ImportError: raise Exception(mpiError2 % PAR.SYSTEM) try: f = open(os.devnull, 'w') subprocess.check_call('which ' + PAR.MPIEXEC, shell=True, stdout=f) except: raise Exception(mpiError3 % PAR.SYSTEM) finally: f.close()
class slurm_md(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'NPROC' not in PAR: raise ParameterError(PAR, 'NPROC') if 'NTASK' not in PAR: raise ParameterError(PAR, 'NTASK') if 'SLURMARGS' not in PAR: setattr(PAR, 'SLURMARGS', '') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', None) if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) self.checkpoint() # submit workflow call('sbatch ' + '%s ' % PAR.SLURMARGS + '--job-name=%s ' % PAR.TITLE + '--output=%s ' % (PATH.SUBMIT + '/' + 'output.log') + '--cpus-per-task=%d ' % PAR.NPROC + '--ntasks=%d ' % PAR.NTASK + '--time=%d ' % PAR.WALLTIME + findpath('seisflows.system') + '/' + 'wrappers/submit ' + PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ self.checkpoint() self.save_kwargs(classname, funcname, kwargs) if hosts == 'all': # run on all available nodes call('srun ' + '--wait=0 ' + join(findpath('seisflows.system'), 'wrappers/run ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname) elif hosts == 'head': # run on head node call('srun ' + '--wait=0 ' + join(findpath('seisflows.system'), 'wrappers/run_head ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname) else: raise (KeyError('Hosts parameter not set/recognized.')) def getnode(self): """ Gets number of running task """ gid = os.getenv('SLURM_GTIDS').split(',') lid = int(os.getenv('SLURM_LOCALID')) return int(gid[lid]) def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ return 'mpirun -np %d ' % PAR.NPROC def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class specfem3d(custom_import('solver', 'base')): """ Python interface for SPECFEM3D See base class for method descriptions """ def check(self): """ Checks parameters and paths """ super(specfem3d, self).check() # check time stepping parameters if 'NT' not in PAR: raise Exception if 'DT' not in PAR: raise Exception if 'F0' not in PAR: raise Exception def generate_data(self, **model_kwargs): """ Generates data """ self.generate_mesh(**model_kwargs) unix.cd(self.getpath) setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') self.call('bin/xspecfem3D') unix.mv(self.data_wildcard, 'traces/obs') self.export_traces(PATH.OUTPUT, 'traces/obs') def generate_mesh(self, model_path=None, model_name=None, model_type='gll'): """ Performs meshing and database generation """ assert (model_name) assert (model_type) self.initialize_solver_directories() unix.cd(self.getpath) if model_type in ['gll']: par = getpar('MODEL').strip() if par != 'gll': if self.getnode == 0: print 'WARNING: Unexpected Par_file setting:' print 'MODEL =', par assert (exists(model_path)) self.check_mesh_properties(model_path) src = glob(model_path + '/' + '*') dst = self.model_databases unix.cp(src, dst) self.call('bin/xmeshfem3D') self.call('bin/xgenerate_databases') self.export_model(PATH.OUTPUT + '/' + model_name) else: raise NotImplementedError ### low-level solver interface def forward(self): """ Calls SPECFEM3D forward solver """ setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') self.call('bin/xgenerate_databases') self.call('bin/xspecfem3D') def adjoint(self): """ Calls SPECFEM3D adjoint solver """ setpar('SIMULATION_TYPE', '3') setpar('SAVE_FORWARD', '.false.') unix.rm('SEM') unix.ln('traces/adj', 'SEM') self.call('bin/xspecfem3D') ### input file writers def check_solver_parameter_files(self): """ Checks solver parameters """ nt = getpar('NSTEP', cast=int) dt = getpar('DT', cast=float) if nt != PAR.NT: if self.getnode == 0: print "WARNING: nt != PAR.NT" setpar('NSTEP', PAR.NT) if dt != PAR.DT: if self.getnode == 0: print "WARNING: dt != PAR.DT" setpar('DT', PAR.DT) if self.mesh.nproc != PAR.NPROC: if self.getnode == 0: print 'Warning: mesh.nproc != PAR.NPROC' if 'MULTIPLES' in PAR: raise NotImplementedError def initialize_adjoint_traces(self): """ Works around SPECFEM3D file format issue by overriding base method """ try: super(specfem3d, self).initialize_adjoint_traces() except: try: import preprocess path_obs = self.getpath + '/' + 'traces/obs' path_adj = self.getpath + '/' + 'traces/adj' # read observed data _, h = preprocess.reader(path_obs, preprocess.channels[0]) zeros = np.zeros((h.nt, h.nr)) # write adjoint traces for channel in ['x', 'y', 'z']: preprocess.writer(zeros, h, path_adj, channel) except: raise Exception( 'Seismic Unix format not supported for SPECFEM3D inversions because SPECFEM3D lacks an adequate parallel reader and writer' ) def write_parameters(self): unix.cd(self.getpath) solvertools.write_parameters(vars(PAR)) def write_receivers(self): unix.cd(self.getpath) key = 'use_existing_STATIONS' val = '.true.' setpar(key, val) _, h = preprocess.load('traces/obs') solvertools.write_receivers(h.nr, h.rx, h.rz) def write_sources(self): unix.cd(self.getpath) _, h = preprocess.load(dir='traces/obs') solvertools.write_sources(vars(PAR), h) ### miscellaneous @property def data_wildcard(self): return glob('OUTPUT_FILES/*SU') @property def kernel_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def model_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def source_prefix(self): return 'FORCESOLUTION'
from seisflows.tools.config import custom_import from seisflows.tools.config import ParameterError, SeisflowsParameters, SeisflowsPaths PAR = SeisflowsParameters() PATH = SeisflowsPaths() """ For users of tiger.princeton.edu, determines whether slurm_sm, slurm_md, or slurm_lg should be used. """ # ensure number of processers per forward simulation is defined if 'NPROC' not in PAR: raise Exception # there are 16 processers per node on tiger if 'NODESIZE' in PAR: assert(PAR.NODESIZE == 16) else: PAR.NODESIZE = 16 # which system interface is appropriate? if PAR.NPROC >= PAR.NODESIZE: tiger = custom_import('system','tiger_lg') elif PAR.NPROC > 1: tiger = custom_import('system','tiger_md') else: tiger = custom_import('system','tiger_sm')
class tiger_md_gpu(custom_import('system', 'tiger_md')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ raise NotImplementedError('Provided by Etienne Bachmann. Not recently testested and not likely to work out of the box.') # why does Etienne have it this way? if 'NGPU' not in PAR: setattr(PAR, 'NGPU', 4) super(tiger_md_gpu, self).check() def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) self.checkpoint() if not exists(PATH.SUBMIT + '/' + 'scratch'): unix.ln(PATH.SCRATCH, PATH.SUBMIT + '/' + 'scratch') call('sbatch ' + '--job-name=%s ' % PAR.SUBTITLE + '--output=%s ' % (PATH.SUBMIT +'/'+ 'output.log') + '--nodes 1 ' + '--ntasks=% ' % PAR.NGPU + '--ntasks-per-socket=%d ' % PAR.NGPU + '--gres=gpu:%d ' % PAR.NGPU + '--time=%d ' % PAR.WALLTIME + findpath('seisflows.system') +'/'+ 'wrappers/submit ' + PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ self.checkpoint() self.save_kwargs(classname, funcname, kwargs) if hosts == 'all': call('srun ' + '--wait=0 ' + join(findpath('seisflows.system'), 'wrappers/run ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname) elif hosts == 'head': # run on head node call('srun ' + '--wait=0 ' + join(findpath('seisflows.system'), 'wrappers/run_head ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname) def getnode(self): """ Gets number of running task """ gid = os.getenv('SLURM_GTIDS').split(',') lid = int(os.getenv('SLURM_LOCALID')) return int(gid[lid]) def mpiexec(self): return 'mpirun -np %d '%PAR.NPROC def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname+'_kwargs') kwargsfile = join(kwargspath, funcname+'.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class regularize3d(custom_import('postprocess', 'base')): """ Adds regularization options to base class This parent class is only an abstract base class; see child classes TIKHONOV1, TIKHONOV1, and TOTAL_VARIATION for usable regularization. Prior to regularizing gradient, near field artifacts must be corrected. The "FIXRADIUS" parameter specifies the radius, in number of GLL points, within which the correction is applied. """ def check(self): """ Checks parameters and paths """ super(regularize3d, self).check() if 'FIXRADIUS' not in PAR: setattr(PAR, 'FIXRADIUS', 7.5) if 'LAMBDA' not in PAR: setattr(PAR, 'LAMBDA', 0.) def write_gradient(self, path): super(regularize3d, self).write_gradient(path) g = self.regularize3d(path) self.save(path, g, backup='noregularize') # modfified by DmBorisov def process_kernels(self, path, parameters): """ Processes kernels in accordance with parameter settings """ fullpath = path + '/' + 'kernels' assert exists(path) if exists(fullpath + '/' + 'sum'): unix.mv(fullpath + '/' + 'sum', fullpath + '/' + 'sum_nofix') # mask sources and receivers system.run('postprocess', 'fix_near_field', hosts='all', path=fullpath) system.run('solver', 'combine', hosts='head', path=fullpath, parameters=parameters) if PAR.SMOOTH > 0.: system.run('solver', 'smooth', hosts='head', path=path + '/' + 'kernels/sum', span=PAR.SMOOTH, parameters=parameters) # modified by DmBorisov def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name g = solver.load(fullpath, suffix='_kernel') g_vec = solver.merge(g) nproc = solver.mesh.nproc if not PAR.FIXRADIUS: return x, y, z = self.getcoords() lx = x.max() - x.min() ly = y.max() - y.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / (lz * ly))) ny = np.around(np.sqrt(nn * ly / (lx * lz))) nz = np.around(np.sqrt(nn * lz / (lx * ly))) dx = lx / nx * 1.25 dy = ly / ny * 1.25 dz = lz / nz * 1.25 sigma = PAR.FIXRADIUS * (dx + dz + dy) / 3.0 _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (y - h.sy[0])**2. + (z - h.sz[0])**2.) / sigma**2.) scale_z = np.power(abs(z), 0.5) power_win = 10 win_x = np.power(x, power_win) win_y = np.power(y, power_win) win_z = np.power(z, power_win) win_x = win_x / win_x.max() win_y = win_y / win_y.max() win_z = win_z / win_z.max() win_x = 1.0 - win_x[::-1] win_y = 1.0 - win_y[::-1] win_z = 1.0 - win_z[::-1] win_x_rev = win_x[::-1] win_y_rev = win_y[::-1] win_z_rev = win_z[::-1] taper_x = x * 0.0 + 1.0 taper_y = y * 0.0 + 1.0 taper_z = z * 0.0 + 1.0 taper_x *= win_x taper_y *= win_y taper_z *= win_z taper_x *= win_x_rev taper_y *= win_y_rev taper_z *= win_z_rev scale_z = scale_z * taper_z + 0.1 mask_x = solver.split(taper_x) mask_y = solver.split(taper_y) mask_z = solver.split(scale_z) mask_d = solver.split(mask) for key in solver.parameters: for iproc in range(nproc): weight = np.sum(mask_d['vp'][iproc] * g[key][iproc]) / np.sum( mask_d['vp'][iproc]) g[key][iproc] *= 1. - mask_d['vp'][iproc] g[key][iproc] *= mask_z['vp'][iproc] g[key][iproc] *= mask_x['vp'][iproc] g[key][iproc] *= mask_y['vp'][iproc] #sigma = 1.0 ## mask receivers #for ir in range(h.nr): # mask = np.exp(-0.5*((x-h.rx[ir])**2.+(y-h.ry[ir])**2.+(z-h.rz[ir])**2.)/sigma**2.) # mask_d = solver.split(mask) # #mask = np.exp(-0.5*((x-h.rx[ir])**2.+(z-h.ry[ir])**2.)/sigma**2.) # for key in solver.parameters: # for iproc in range(nproc): # #weight = np.sum(mask*g[key][0])/np.sum(mask) # g[key][iproc] *= 1.-mask_d['vp'][iproc] # #g[key][0] += mask*weight solver.save(fullpath, g, suffix='_kernel') def regularize3d(self, path): assert (exists(path)) g = solver.load(path + '/' + 'gradient', suffix='_kernel') if not PAR.LAMBDA: return solver.merge(g) m = solver.load(path + '/' + 'model') mesh = self.getmesh() for key in solver.parameters: for iproc in range(PAR.NPROC): g[key][iproc] += PAR.LAMBDA *\ self.nabla(mesh, m[key][iproc], g[key][iproc]) return solver.merge(g) def nabla(self, mesh, m, g): raise NotImplementedError("Must be implemented by subclass.") # modified by DmBorisov def getcoords(self): model_path = PATH.OUTPUT + '/' + 'model_init' model = solver.load_xyz(model_path) nproc = solver.mesh.nproc x = [] y = [] z = [] for iproc in range(nproc): x = np.append(x, model['x_loc'][iproc]) y = np.append(y, model['y_loc'][iproc]) z = np.append(z, model['z_loc'][iproc]) return np.array(x), np.array(y), np.array(z) def tukeywin2(window_length, alpha): '''The Tukey window, also known as the tapered cosine window, can be regarded as a cosine lobe of width \alpha * N / 2 that is convolved with a rectangle window of width (1 - \alpha / 2). At \alpha = 1 it becomes rectangular, and at \alpha = 0 it becomes a Hann window. We use the same reference as MATLAB to provide the same results in case users compare a MATLAB output to this function output Reference --------- http://www.mathworks.com/access/helpdesk/help/toolbox/signal/tukeywin.html ''' # Special cases if alpha <= 0: return np.ones(window_length) #rectangular window elif alpha >= 1: return np.hanning(window_length) # Normal case x = np.linspace(0, 1, window_length) w = np.ones(x.shape) # first condition 0 <= x < alpha/2 first_condition = x < alpha / 2 w[first_condition] = 0.5 * (1 + np.cos(2 * np.pi / alpha * (x[first_condition] - alpha / 2))) # second condition already taken care of # third condition 1 - alpha / 2 <= x <= 1 third_condition = x >= (1 - alpha / 2) w[third_condition] = 0.5 * ( 1 + np.cos(2 * np.pi / alpha * (x[third_condition] - 1 + alpha / 2))) return w
from seisflows.tools.config import custom_import from seisflows.tools.config import ParameterError, SeisflowsParameters, SeisflowsPaths PAR = SeisflowsParameters() PATH = SeisflowsPaths() """ For users of tiger.princeton.edu, determines whether slurm_sm, slurm_md, or slurm_lg should be used. """ # ensure number of processers per forward simulation is defined if 'NPROC' not in PAR: raise Exception # there are 16 processers per node on tiger if 'NODESIZE' in PAR: assert (PAR.NODESIZE == 16) else: PAR.NODESIZE = 16 # which system interface is appropriate? if PAR.NPROC >= PAR.NODESIZE: tiger = custom_import('system', 'tiger_lg') elif PAR.NPROC > 1: tiger = custom_import('system', 'tiger_md') else: tiger = custom_import('system', 'tiger_sm')
class slurm_lg(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'STEPTIME' not in PAR: setattr(PAR, 'STEPTIME', 30.) if 'SLEEPTIME' not in PAR: setattr(PAR, 'SLEEPTIME', 1.) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'NTASK' not in PAR: raise ParameterError(PAR, 'NTASK') if 'NPROC' not in PAR: raise ParameterError(PAR, 'NPROC') if 'NODESIZE' not in PAR: raise ParameterError(PAR, 'NODESIZE') if 'SLURMARGS' not in PAR: setattr(PAR, 'SLURMARGS', '') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', None) if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) if 'SYSTEM' not in PATH: setattr(PATH, 'SYSTEM', join(PATH.SCRATCH, 'system')) def submit(self, workflow): """ Submits workflow """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) unix.mkdir(PATH.SUBMIT+'/'+'output.slurm') self.checkpoint() # prepare sbatch arguments call('sbatch ' + '%s ' % PAR.SLURMARGS + '--job-name=%s ' % PAR.TITLE + '--output %s ' % (PATH.SUBMIT+'/'+'output.log') + '--ntasks-per-node=%d ' % PAR.NODESIZE + '--nodes=%d ' % 1 + '--time=%d ' % PAR.WALLTIME + findpath('seisflows.system') +'/'+ 'wrappers/submit ' + PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts. """ self.checkpoint() self.save_kwargs(classname, funcname, kwargs) jobs = self._launch(classname, funcname, hosts) while True: # wait a few seconds before checking status time.sleep(60.*PAR.SLEEPTIME) self._timestamp() isdone, jobs = self._status(classname, funcname, jobs) if isdone: return def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ return 'srun ' def getnode(self): """ Gets number of running task """ try: return int(os.getenv('SLURM_ARRAY_TASK_ID')) except: raise Exception("TASK_ID environment variable not defined.") ### private methods def _launch(self, classname, funcname, hosts='all'): unix.mkdir(PATH.SYSTEM) with open(PATH.SYSTEM+'/'+'job_id', 'w') as f: call('sbatch ' + '%s ' % PAR.SLURMARGS + '--job-name=%s ' % PAR.TITLE + '--nodes=%d ' % math.ceil(PAR.NPROC/float(PAR.NODESIZE)) + '--ntasks-per-node=%d ' % PAR.NODESIZE + '--ntasks=%d ' % PAR.NPROC + '--time=%d ' % PAR.STEPTIME + self._launch_args(hosts) + findpath('seisflows.system') +'/'+ 'wrappers/run ' + PATH.OUTPUT + ' ' + classname + ' ' + funcname + ' ', stdout=f) # retrieve job ids with open(PATH.SYSTEM+'/'+'job_id', 'r') as f: line = f.readline() job = line.split()[-1].strip() if hosts == 'all' and PAR.NTASK > 1: return [job+'_'+str(ii) for ii in range(PAR.NTASK)] else: return [job] def _launch_args(self, hosts): if hosts == 'all': args = ('--array=%d-%d% %50 ' % (0,PAR.NTASK-1) +'--output %s ' % (PATH.SUBMIT+'/'+'output.slurm/'+'%A_%a')) elif hosts == 'head': args = ('--array=%d-%d% %50 ' % (0,0) +'--output=%s ' % (PATH.SUBMIT+'/'+'output.slurm/'+'%j')) return args def _status(self, classname, funcname, jobs): """ Determines completion status of one or more jobs """ for job in jobs: state = self._query(job) states = [] if state in ['COMPLETED']: states += [1] else: states += [0] if state in ['FAILED', 'NODE_FAIL', 'TIMEOUT']: print msg.TaskError_SLURM % (classname, funcname, job) sys.exit(-1) isdone = all(states) return isdone, jobs def _query(self, jobid): """ Queries job state from SLURM database """ with open(PATH.SYSTEM+'/'+'job_status', 'w') as f: call('sacct -n -o state -j '+jobid, stdout=f) with open(PATH.SYSTEM+'/'+'job_status', 'r') as f: line = f.readline() state = line.strip() return state ### utility function def _timestamp(self): with open(PATH.SYSTEM+'/'+'timestamps', 'a') as f: line = time.strftime('%H:%M:%S')+'\n' f.write(line) def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname+'_kwargs') kwargsfile = join(kwargspath, funcname+'.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class specfem2d(custom_import('solver', 'base')): """ Python interface for SPECFEM2D See base class for method descriptions """ if PAR.MATERIALS == 'LegacyAcoustic': parameters = [] parameters += ['vs'] def check(self): """ Checks parameters and paths """ super(specfem2d, self).check() # check time stepping parameters if 'NT' not in PAR: raise Exception if 'DT' not in PAR: raise Exception if 'F0' not in PAR: raise Exception def check_solver_parameter_files(self): """ Checks solver parameters """ return nt = getpar('nt', cast=int) dt = getpar('deltat', cast=float) f0 = getpar('f0', file='DATA/SOURCE', cast=float) if nt != PAR.NT: if self.getnode == 0: print "WARNING: nt != PAR.NT" setpar('nt', PAR.NT) if dt != PAR.DT: if self.getnode == 0: print "WARNING: dt != PAR.DT" setpar('deltat', PAR.DT) if f0 != PAR.F0: if self.getnode == 0: print "WARNING: f0 != PAR.F0" setpar('f0', PAR.F0, file='DATA/SOURCE') if self.mesh.nproc != PAR.NPROC: if self.getnode == 0: print 'Warning: mesh.nproc != PAR.NPROC' if 'MULTIPLES' in PAR: if PAR.MULTIPLES: setpar('absorbtop', '.false.') else: setpar('absorbtop', '.true.') def generate_data(self, **model_kwargs): """ Generates data """ self.generate_mesh(**model_kwargs) unix.cd(self.getpath) setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') self.call('bin/xmeshfem2D') self.call('bin/xspecfem2D', output='log.solver') unix.mv(self.data_wildcard, 'traces/obs') self.export_traces(PATH.OUTPUT, 'traces/obs') def generate_mesh(self, model_path=None, model_name=None, model_type='gll'): """ Performs meshing and database generation """ assert (model_name) assert (model_type) self.initialize_solver_directories() unix.cd(self.getpath) assert (exists(model_path)) self.check_mesh_properties(model_path) src = glob(join(model_path, '*')) dst = join(self.getpath, 'DATA') unix.cp(src, dst) self.export_model(PATH.OUTPUT + '/' + model_name) ### low-level solver interface def forward(self): """ Calls SPECFEM2D forward solver """ setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') self.call('bin/xmeshfem2D') self.call('bin/xspecfem2D') def adjoint(self): """ Calls SPECFEM2D adjoint solver """ setpar('SIMULATION_TYPE', '3') setpar('SAVE_FORWARD', '.false.') unix.rm('SEM') unix.ln('traces/adj', 'SEM') self.call('bin/xmeshfem2D') self.call('bin/xspecfem2D') ### postprocessing utilities def smooth(self, path='', parameters='dummy', span=0.): """ Smooths SPECFEM2D kernels by convolving them with a Gaussian """ from seisflows.tools.array import meshsmooth, stack #assert parameters == self.parameters # implementing nproc > 1 would be straightforward, but a bit tedious #assert self.mesh.nproc == 1 kernels = self.load(path, suffix='_kernel') if not span: return kernels # set up grid _, x = loadbypar(PATH.MODEL_INIT, ['x'], 0) _, z = loadbypar(PATH.MODEL_INIT, ['z'], 0) mesh = stack(x[0], z[0]) for key in self.parameters: kernels[key] = [meshsmooth(kernels[key][0], mesh, span)] unix.rm(path + '_nosmooth') unix.mv(path, path + '_nosmooth') self.save(path, kernels, suffix='_kernel') ### file transfer utilities def import_model(self, path): src = glob(path + '/' + 'model/*') dst = join(self.getpath, 'DATA/') unix.cp(src, dst) def export_model(self, path): if self.getnode == 0: unix.mkdir(path) src = glob(join(self.getpath, 'DATA/*.bin')) dst = path unix.cp(src, dst) ### input file writers def write_parameters(self): unix.cd(self.getpath) solvertools.write_parameters(vars(PAR)) def write_receivers(self): unix.cd(self.getpath) key = 'use_existing_STATIONS' val = '.true.' setpar(key, val) _, h = preprocess.load('traces/obs') solvertools.write_receivers(h.nr, h.rx, h.rz) def write_sources(self): unix.cd(self.getpath) _, h = preprocess.load(dir='traces/obs') solvertools.write_sources(vars(PAR), h) ### miscellaneous @property def data_wildcard(self): return glob('OUTPUT_FILES/U?_file_single.su') #return glob('OUTPUT_FILES/*semd') @property def model_databases(self): return join(self.getpath, 'DATA') @property def kernel_databases(self): return join(self.getpath, 'OUTPUT_FILES') @property def source_prefix(self): return 'SOURCE'
class multithreaded(custom_import('system', 'serial')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ super(multithreaded, self).check() if 'NPROCMAX' not in PAR: raise Exception def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ unix.mkdir(PATH.SYSTEM) self.checkpoint() self.save_kwargs(classname, funcname, kwargs) if hosts == 'all': running_tasks = dict() queued_tasks = range(PAR.NTASK) # implements "work queue" pattern while queued_tasks or running_tasks: # launch queued tasks while len(queued_tasks) > 0 and \ len(running_tasks) < PAR.NPROCMAX: i = queued_tasks.pop(0) p = self._launch(classname, funcname, itask=i) running_tasks[i] = p # checks status of running tasks for i, p in running_tasks.items(): if p.poll() != None: running_tasks.pop(i) if running_tasks: sleep(1) print '' elif hosts == 'head': self.setnode(0) func = getattr(__import__(classname), funcname) func(**kwargs) else: raise (KeyError('Hosts parameter not set/recognized.')) ### private methods def _launch(self, classname, funcname, itask=0): self.progress(itask) env = os.environ.copy().items() env += [['SEISFLOWS_TASKID', str(itask)]] p = Popen(findpath('seisflows.system') + '/' + 'wrappers/run ' + PATH.OUTPUT + ' ' + classname + ' ' + funcname, shell=True, env=dict(env)) return p def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
class serial(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'NTASK' not in PAR: setattr(PAR, 'NTASK', 1) if 'NPROC' not in PAR: setattr(PAR, 'NPROC', 1) if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', '') if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) if 'SYSTEM' not in PATH: setattr(PATH, 'SYSTEM', join(PATH.SCRATCH, 'system')) def submit(self, workflow): """ Submits job """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) self.checkpoint() workflow.main() def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ unix.mkdir(PATH.SYSTEM) if hosts == 'all': for itask in range(PAR.NTASK): self.setnode(itask) self.progress(itask) func = getattr(__import__(classname), funcname) func(**kwargs) print '' elif hosts == 'head': self.setnode(0) func = getattr(__import__(classname), funcname) func(**kwargs) else: task(**kwargs) def getnode(self): """ Gets number of running task """ return int(os.environ['SEISFLOWS_TASKID']) def setnode(self, itask): """ Sets number of running task """ os.environ['SEISFLOWS_TASKID'] = str(itask) def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ if PAR.NPROC > 1: return 'mpiexec -np %d ' % PAR.NPROC else: return '' def progress(self, itask=None): """ Provides status updates """ if PAR.VERBOSE and PAR.NTASK > 1: print ' task ' + '%02d' % (itask + 1) + ' of ' + '%02d' % PAR.NTASK
class specfem3d(custom_import('solver', 'base')): """ Python interface for SPECFEM3D See base class for method descriptions """ def check(self): """ Checks parameters and paths """ super(specfem3d, self).check() # check time stepping parameters if 'NT' not in PAR: raise Exception if 'DT' not in PAR: raise Exception if 'F0' not in PAR: raise Exception # check data format if 'FORMAT' not in PAR: raise Exception() if PAR.FORMAT != 'su': raise Exception() def generate_data(self, **model_kwargs): """ Generates data """ self.generate_mesh(**model_kwargs) unix.cd(self.getpath) setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') call_solver(system.mpiexec(), 'bin/xspecfem3D') if PAR.FORMAT in ['SU', 'su']: src = glob('OUTPUT_FILES/*_d?_SU') dst = 'traces/obs' unix.mv(src, dst) def generate_mesh(self, model_path=None, model_name=None, model_type='gll'): """ Performs meshing and database generation """ assert(model_name) assert(model_type) self.initialize_solver_directories() unix.cd(self.getpath) if model_type in ['gll']: par = getpar('MODEL').strip() if par != 'gll': if self.getnode == 0: print 'WARNING: Unexpected Par_file setting:' print 'MODEL =', par assert(exists(model_path)) self.check_mesh_properties(model_path) src = glob(model_path +'/'+ '*') dst = self.model_databases unix.cp(src, dst) call_solver(system.mpiexec(), 'bin/xmeshfem3D') call_solver(system.mpiexec(), 'bin/xgenerate_databases') self.export_model(PATH.OUTPUT +'/'+ model_name) else: raise NotImplementedError ### low-level solver interface def forward(self, path='traces/syn'): """ Calls SPECFEM3D forward solver """ setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') call_solver(system.mpiexec(), 'bin/xgenerate_databases') call_solver(system.mpiexec(), 'bin/xspecfem3D') if PAR.FORMAT in ['SU', 'su']: src = glob('OUTPUT_FILES/*_d?_SU') dst = path unix.mv(src, dst) def adjoint(self): """ Calls SPECFEM3D adjoint solver """ setpar('SIMULATION_TYPE', '3') setpar('SAVE_FORWARD', '.false.') unix.rm('SEM') unix.ln('traces/adj', 'SEM') call_solver(system.mpiexec(), 'bin/xspecfem3D') # work around SPECFEM3D conflicting name conventions self.rename_data() ### input file writers def check_solver_parameter_files(self): """ Checks solver parameters """ nt = getpar('NSTEP', cast=int) dt = getpar('DT', cast=float) if nt != PAR.NT: if self.getnode == 0: print "WARNING: nt != PAR.NT" setpar('NSTEP', PAR.NT) if dt != PAR.DT: if self.getnode == 0: print "WARNING: dt != PAR.DT" setpar('DT', PAR.DT) if self.mesh_properties.nproc != PAR.NPROC: if self.getnode == 0: print 'Warning: mesh_properties.nproc != PAR.NPROC' if 'MULTIPLES' in PAR: raise NotImplementedError def initialize_adjoint_traces(self): """ Works around SPECFEM3D file format issue by overriding base method """ def initialize_adjoint_traces(self): super(specfem3d, self).initialize_adjoint_traces() # workaround for SPECFEM2D's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT in ['SU', 'su']: files = glob(self.getpath +'/'+ 'traces/adj/*SU') unix.rename('_SU', '_SU.adj', files) # workaround for SPECFEM3D's requirement that all components exist, # even ones not in use unix.cd(self.getpath +'/'+ 'traces/adj') for iproc in range(PAR.NPROC): for channel in ['x', 'y', 'z']: src = '%d_d%s_SU.adj' % (iproc, PAR.CHANNELS[0]) dst = '%d_d%s_SU.adj' % (iproc, channel) if not exists(dst): unix.cp(src, dst) def rename_data(self): """ Works around conflicting data filename conventions """ if PAR.FORMAT in ['SU', 'su']: files = glob(self.getpath +'/'+ 'traces/adj/*SU') unix.rename('_SU', '_SU.adj', files) def write_parameters(self): unix.cd(self.getpath) solvertools.write_parameters(vars(PAR)) def write_receivers(self): unix.cd(self.getpath) key = 'use_existing_STATIONS' val = '.true.' setpar(key, val) _, h = preprocess.load('traces/obs') solvertools.write_receivers(h.nr, h.rx, h.rz) def write_sources(self): unix.cd(self.getpath) _, h = preprocess.load(dir='traces/obs') solvertools.write_sources(vars(PAR), h) ### miscellaneous @property def data_wildcard(self): channels = PAR.CHANNELS return '*_d[%s]_SU' % channels.lower() @property def data_filenames(self): if PAR.CHANNELS: if PAR.FORMAT in ['SU', 'su']: filenames = [] for channel in PAR.CHANNELS: for iproc in range(PAR.NPROC): filenames += ['%d_d%s_SU' % (iproc, channel)] return filenames else: unix.cd(self.getpath) unix.cd('traces/obs') if PAR.FORMAT in ['SU', 'su']: return glob('*_d[%s]_SU') @property def kernel_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def model_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def source_prefix(self): return 'FORCESOLUTION'
class thrifty_inversion(custom_import('workflow', 'inversion')): """ Thrifty inversion subclass Provides savings over conventional inversion by avoiding redundant forward simulations associated with sufficient decrease and curvature tests in a safeguarded backtracking line search. The results of 'inversion' and 'thrifty_inversion' should be exactly the same. Users who prefer a simpler but less efficient workflow can choose choose 'inversion'. Users who prefer a more efficient but more complicated workflow can choose 'thrifty_inversion.' """ def solver_status(self, maxiter=1): """ Keeps track of whether a forward simulation would be redundant """ if optimize.iter <= maxiter: # forward simulation not redundant because solver files do not exist # prior to first iteration return False elif optimize.iter == PAR.BEGIN: # forward simulation not redundant because solver files need to be # reinstated after possible multiscale transition return False elif PATH.LOCAL: # forward simulation not redundant because solver files need to be # reinstated on local filesystems return False elif PAR.LINESEARCH != 'Backtrack': # thrifty inversion only implemented for backtracking line search, # not bracketing line search return False elif optimize.restarted: # forward simulation not redundant following optimization algorithm # restart return False else: # if none of the above conditions are triggered, then forward # simulation is redundant, can be skipped return True def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.SCRATCH) unix.mkdir(PATH.SCRATCH) preprocess.setup() postprocess.setup() optimize.setup() isready = self.solver_status() if not isready: if PATH.DATA: print 'Copying data' else: print 'Generating data' system.run('solver', 'setup', hosts='all') def initialize(self): # are prerequisites for gradient evaluation in place? isready = self.solver_status(maxiter=2) # if not, then prepare for gradient evaluation if not isready: super(thrifty_inversion, self).initialize() def iterate_search(self): super(thrifty_inversion, self).iterate_search() isdone = optimize.isdone isready = self.solver_status() # to avoid redundant forward simulation, save solver files associated # with 'best' trial model if isready and isdone: unix.rm(PATH.SOLVER+'_best') unix.mv(PATH.SOLVER, PATH.SOLVER+'_best') def clean(self): isready = self.solver_status() if isready: unix.rm(PATH.GRAD) unix.mv(PATH.FUNC, PATH.GRAD) unix.mkdir(PATH.FUNC) unix.rm(PATH.SOLVER) unix.mv(PATH.SOLVER+'_best', PATH.SOLVER) else: super(thrifty_inversion, self).clean()
import numpy as np from seisflows.tools import unix from seisflows.tools.array import loadnpy, savenpy from seisflows.tools.code import exists from seisflows.tools.config import SeisflowsParameters, SeisflowsPaths, \ custom_import, ParameterError PAR = SeisflowsParameters() PATH = SeisflowsPaths() import solver import postprocess migration = custom_import('workflow', 'migration')() # modified by DmBorisov class test_postprocess(object): """ Postprocessing class """ def check(self): """ Checks parameters and paths """ migration.check() if 'INPUT' not in PATH: setattr(PATH, 'INPUT', None) def main(self): """ Writes gradient of objective function
class regularize(custom_import('postprocess', 'base')): """ Adds regularization options to base class This parent class is only an abstract base class; see child classes TIKHONOV1, TIKHONOV1, and TOTAL_VARIATION for usable regularization. Prior to regularizing gradient, near field artifacts must be corrected. The "FIXRADIUS" parameter specifies the radius, in number of GLL points, within which the correction is applied. """ def check(self): """ Checks parameters and paths """ super(regularize, self).check() if 'FIXRADIUS' not in PAR: setattr(PAR, 'FIXRADIUS', 7.5) if 'LAMBDA' not in PAR: setattr(PAR, 'LAMBDA', 0.) def write_gradient(self, path): super(regularize, self).write_gradient(path) g = self.regularize(path) self.save(path, g, backup='noregularize') def process_kernels(self, path, parameters): """ Processes kernels in accordance with parameter settings """ fullpath = path + '/' + 'kernels' assert exists(path) #if exists(fullpath +'/'+ 'sum'): # unix.mv(fullpath +'/'+ 'sum', fullpath +'/'+ 'sum_nofix') # mask sources and receivers system.run('postprocess', 'fix_near_field', hosts='all', path=fullpath) if PAR.SMOOTH > 0.: system.run('solver', 'smooth', hosts='head', path=path + '/' + 'kernels/sum', span=PAR.SMOOTH, parameters=parameters) system.run('solver', 'combine', hosts='head', path=fullpath, parameters=parameters) def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name #print 'DB: name=', name #print 'DB: fullpath=', fullpath g = solver.load(fullpath, suffix='_kernel') g_vec = solver.merge(g) nproc = solver.mesh.nproc #print 'DB: len(g_vec)=', len(g_vec) if not PAR.FIXRADIUS: return x, y, z = self.getcoords() #print 'DB: len(g)=', len(g) #print 'DB: len(g[vp][0])=', len(g['vp'][0]) #print 'DB: x.shape=', x.shape #print 'DB: len(x)=', len(x) ##sys.exit("DB: stop from postporcess-regularize") lx = x.max() - x.min() ly = y.max() - y.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / (lz * ly))) ny = np.around(np.sqrt(nn * ly / (lx * lz))) nz = np.around(np.sqrt(nn * lz / (lx * ly))) dx = lx / nx * 1.25 dy = ly / ny * 1.25 dz = lz / nz * 1.25 #print 'DB: lx=', lx #print 'DB: ly=', ly #print 'DB: lz=', lz #print 'DB: nn=', nn #print 'DB: nx=', nx #print 'DB: ny=', ny #print 'DB: nz=', nz #print 'DB: dx=', dx #print 'DB: dy=', dy #print 'DB: dz=', dz sigma = PAR.FIXRADIUS * (dx + dz + dy) / 3.0 _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') # mask sources mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (y - h.sy[0])**2. + (z - h.sz[0])**2.) / sigma**2.) # mask top # for matlab # z_sqrt=(abs(z).^(0.25)); depth_scale=1-z_sqrt/max(z_sqrt); figure; plot(depth_scale,z); z_factor = np.power(abs(z), 0.5) #max_z_factor = np.amax(z_factor) #scale_depth = 1.0 - z_factor/max_z_factor #print 'DB: max(z_factor)=',max_z_factor #print 'DB: max(scale_depth)=',np.amax(scale_depth) #print 'DB: min(scale_depth)=',np.amin(scale_depth) #mask *= scale_depth #mask_depth = solver.split(z) mask_depth = solver.split(z_factor) mask_d = solver.split(mask) ##print 'DB: sigma=',sigma ##print 'DB: mask=',mask #print 'DB: len(mask)=', len(mask) #print 'DB: len(mask_d)=', len(mask_d) ##print 'DB: len(g)=', len(g) ##print 'DB: len(g)[vp][0]=', len(g['vp'][0]) for key in solver.parameters: for iproc in range(nproc): #print 'DB: key, iproc=', key, iproc #print 'DB: len(g[key][iproc])=', len(g[key][iproc]) #print 'DB: len(mask_d[key][iproc])=', len(mask_d[key][iproc]) weight = np.sum(mask_d['vp'][iproc] * g[key][iproc]) / np.sum( mask_d['vp'][iproc]) #print 'DB: key, iproc, weigth= ', key, iproc, weight g[key][iproc] *= 1. - mask_d['vp'][iproc] g[key][iproc] *= mask_depth['vp'][iproc] #g[key][iproc] += mask_d['vp'][iproc]*weight #weight = np.sum(mask_d['vp'][iproc]*g[key][iproc])/np.sum(mask_d['vp'][iproc]) ##print 'DB: key, iproc, weigth= ', key, iproc, weight #g[key][iproc] *= 1.-mask_d['vp'][iproc] #g[key][iproc] += mask_d['vp'][iproc]*weight # mask receivers #for ir in range(h.nr): # mask = np.exp(-0.5*((x-h.rx[ir])**2.+(z-h.ry[ir])**2.)/sigma**2.) # for key in solver.parameters: # weight = np.sum(mask*g[key][0])/np.sum(mask) # g[key][0] *= 1.-mask # g[key][0] += mask*weight solver.save(fullpath, g, suffix='_kernel') def regularize(self, path): assert (exists(path)) g = solver.load(path + '/' + 'gradient', suffix='_kernel') if not PAR.LAMBDA: return solver.merge(g) m = solver.load(path + '/' + 'model') mesh = self.getmesh() for key in solver.parameters: for iproc in range(PAR.NPROC): g[key][iproc] += PAR.LAMBDA *\ self.nabla(mesh, m[key][iproc], g[key][iproc]) #self.nabla(m[key][iproc], g[key][iproc] , mesh, h) return solver.merge(g) def nabla(self, mesh, m, g): raise NotImplementedError("Must be implemented by subclass.") def getcoords(self): model_path = PATH.OUTPUT + '/' + 'model_init' model = solver.load_xyz(model_path) nproc = solver.mesh.nproc #print len(model) #=5 #print len(model['x']) #=32 #print nproc #=32 x = [] y = [] z = [] for iproc in range(nproc): #print 'DB: iproc, len(model[x_loc][iproc])', iproc, len(model['x_loc'][iproc]) x = np.append(x, model['x_loc'][iproc]) y = np.append(y, model['y_loc'][iproc]) z = np.append(z, model['z_loc'][iproc]) return np.array(x), np.array(y), np.array(z)
class regularize(custom_import('postprocess', 'base')): """ Adds regularization options to base class This parent class is only an abstract base class; see child classes TIKHONOV1, TIKHONOV1, and TOTAL_VARIATION for usable regularization. Prior to regularizing gradient, near field artifacts must be corrected. The "FIXRADIUS" parameter specifies the radius, in number of GLL points, within which the correction is applied. """ def check(self): """ Checks parameters and paths """ super(regularize, self).check() if 'FIXRADIUS' not in PAR: setattr(PAR, 'FIXRADIUS', 7.5) if 'LAMBDA' not in PAR: setattr(PAR, 'LAMBDA', 0.) def write_gradient(self, path): super(regularize, self).write_gradient(path) g = self.regularize(path) self.save(path, g, backup='noregularize') def process_kernels(self, path, parameters): """ Processes kernels in accordance with parameter settings """ fullpath = path + '/' + 'kernels' assert exists(path) if exists(fullpath + '/' + 'sum'): unix.mv(fullpath + '/' + 'sum', fullpath + '/' + 'sum_nofix') # mask sources and receivers system.run('postprocess', 'fix_near_field', hosts='all', path=fullpath) system.run('solver', 'combine', hosts='head', path=fullpath, parameters=parameters) def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name g = solver.load(fullpath, suffix='_kernel') if not PAR.FIXRADIUS: return mesh = self.getmesh() x, z = self.getxz() lx = x.max() - x.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / lz)) nz = np.around(np.sqrt(nn * lz / lx)) dx = lx / nx dz = lz / nz sigma = 0.5 * PAR.FIXRADIUS * (dx + dz) _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') # mask sources mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (z - h.sy[0])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight # mask receivers for ir in range(h.nr): mask = np.exp(-0.5 * ((x - h.rx[ir])**2. + (z - h.ry[ir])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight solver.save(fullpath, g, suffix='_kernel') def regularize(self, path): assert (exists(path)) g = solver.load(path + '/' + 'gradient', suffix='_kernel') if not PAR.LAMBDA: return solver.merge(g) m = solver.load(path + '/' + 'model') mesh = self.getmesh() for key in solver.parameters: for iproc in range(PAR.NPROC): g[key][iproc] += PAR.LAMBDA *\ self.nabla(mesh, m[key][iproc], g[key][iproc]) #self.nabla(m[key][iproc], g[key][iproc] , mesh, h) return solver.merge(g) def nabla(self, mesh, m, g): raise NotImplementedError("Must be implemented by subclass.") def getmesh(self): model_path = PATH.OUTPUT + '/' + 'model_init' try: m = solver.load(model_path) x = m['x'][0] z = m['z'][0] mesh = stack(x, z) except: from seisflows.seistools.io import loadbin x = loadbin(model_path, 0, 'x') z = loadbin(model_path, 0, 'z') mesh = stack(x, z) return mesh def getxz(self): model_path = PATH.OUTPUT + '/' + 'model_init' try: m = solver.load(model_path) x = m['x'][0] z = m['z'][0] except: from seisflows.seistools.io import loadbin x = loadbin(model_path, 0, 'x') z = loadbin(model_path, 0, 'z') return x, z
class pbs_sm(custom_import('system', 'mpi')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ super(pbs_sm, self).check() # check parameters if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'MEMORY' not in PAR: setattr(PAR, 'MEMORY', 0) if 'NODESIZE' not in PAR: raise ParameterError(PAR, 'NODESIZE') if 'PBSARGS' not in PAR: setattr(PAR, 'PBSARGS', '') def submit(self, workflow): """Submits job """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) # save current state self.checkpoint() # construct resource list resources = [] nodes = int(PAR.NTASK / PAR.NODESIZE) cores = PAR.NTASK % PAR.NODESIZE hours = int(PAR.WALLTIME / 60) minutes = PAR.WALLTIME % 60 if PAR.WALLTIME: resources += ['walltime=%02d:%02d:00'%(hours, minutes)] if PAR.MEMORY: resources += ['mem=%dgb' % PAR.MEMORY] if nodes == 0: resources += ['nodes=1:ppn=%d'%(cores)] elif cores == 0: resources += ['nodes=%d:ppn=%d'%(nodes, PAR.NODESIZE)] else: resources += ['nodes=%d:ppn=%d+1:ppn=%d'%(nodes, PAR.NODESIZE, cores)] # construct arguments list call('qsub ' + '%s ' % PAR.PBSARGS + '-N %s '%PAR.TITLE + '-o %s '%(PATH.SUBMIT +'/'+ 'output.log') + '-l %s '%resources.join(',') + '-j %s '%'oe' + findpath('seisflows.system') +'/'+ 'wrappers/submit ' + '-F %s '%PATH.OUTPUT)
class specfem2d(custom_import('solver', 'base')): """ Python interface for SPECFEM2D See base class for method descriptions """ if PAR.MATERIALS == 'LegacyAcoustic': parameters = [] parameters += ['vs'] def check(self): """ Checks parameters and paths """ super(specfem2d, self).check() if 'WITH_MPI' not in PAR: setattr(PAR, 'WITH_MPI', False) # check time stepping parameters if 'NT' not in PAR: raise Exception if 'DT' not in PAR: raise Exception if 'F0' not in PAR: raise Exception # check data format if 'FORMAT' not in PAR: raise Exception() if PAR.FORMAT != 'su': raise Exception() def check_solver_parameter_files(self): """ Checks solver parameters """ nt = getpar('nt', cast=int) dt = getpar('deltat', cast=float) f0 = getpar('f0', file='DATA/SOURCE', cast=float) if nt != PAR.NT: if self.getnode == 0: print "WARNING: nt != PAR.NT" setpar('nt', PAR.NT) if dt != PAR.DT: if self.getnode == 0: print "WARNING: dt != PAR.DT" setpar('deltat', PAR.DT) if f0 != PAR.F0: if self.getnode == 0: print "WARNING: f0 != PAR.F0" setpar('f0', PAR.F0, file='DATA/SOURCE') if self.mesh_properties.nproc != PAR.NPROC: if self.getnode == 0: print 'Warning: mesh_properties.nproc != PAR.NPROC' if 'MULTIPLES' in PAR: if PAR.MULTIPLES: setpar('absorbtop', '.false.') else: setpar('absorbtop', '.true.') def generate_data(self, **model_kwargs): """ Generates data """ self.generate_mesh(**model_kwargs) unix.cd(self.getpath) setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.false.') if PAR.WITH_MPI: call_solver(system.mpiexec(), 'bin/xmeshfem2D') call_solver(system.mpiexec(), 'bin/xspecfem2D') else: call_solver_nompi('bin/xmeshfem2D') call_solver_nompi('bin/xspecfem2D') if PAR.FORMAT in ['SU', 'su']: src = glob('OUTPUT_FILES/*.su') dst = 'traces/obs' unix.mv(src, dst) def initialize_adjoint_traces(self): super(specfem2d, self).initialize_adjoint_traces() # work around SPECFEM2D's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT in ['SU', 'su']: files = glob('traces/adj/*.su') unix.rename('.su', '.su.adj', files) # work around SPECFEM2D's requirement that all components exist, # even ones not in use if PAR.FORMAT in ['SU', 'su']: unix.cd(self.getpath + '/' + 'traces/adj') for channel in ['x', 'y', 'z', 'p']: src = 'U%s_file_single.su.adj' % PAR.CHANNELS[0] dst = 'U%s_file_single.su.adj' % channel if not exists(dst): unix.cp(src, dst) def generate_mesh(self, model_path=None, model_name=None, model_type='gll'): """ Performs meshing and database generation """ assert (model_name) assert (model_type) self.initialize_solver_directories() unix.cd(self.getpath) assert (exists(model_path)) self.check_mesh_properties(model_path) src = glob(join(model_path, '*')) dst = join(self.getpath, 'DATA') unix.cp(src, dst) self.export_model(PATH.OUTPUT + '/' + model_name) ### low-level solver interface def forward(self, path='traces/syn'): """ Calls SPECFEM2D forward solver """ setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') if PAR.WITH_MPI: call_solver(system.mpiexec(), 'bin/xmeshfem2D') call_solver(system.mpiexec(), 'bin/xspecfem2D') else: call_solver_nompi('bin/xmeshfem2D') call_solver_nompi('bin/xspecfem2D') if PAR.FORMAT in ['SU', 'su']: filenames = glob('OUTPUT_FILES/*.su') unix.mv(filenames, path) def adjoint(self): """ Calls SPECFEM2D adjoint solver """ setpar('SIMULATION_TYPE', '3') setpar('SAVE_FORWARD', '.false.') unix.rm('SEM') unix.ln('traces/adj', 'SEM') # hack to deal with SPECFEM2D's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT in ['SU', 'su']: files = glob('traces/adj/*.su') unix.rename('.su', '.su.adj', files) if PAR.WITH_MPI: call_solver(system.mpiexec(), 'bin/xmeshfem2D') call_solver(system.mpiexec(), 'bin/xspecfem2D') else: call_solver_nompi('bin/xmeshfem2D') call_solver_nompi('bin/xspecfem2D') ### postprocessing utilities def smooth(self, path='', parameters=None, span=0.): """ Smooths SPECFEM2D kernels by convolving them with a Gaussian """ from seisflows.tools.array import meshsmooth, stack #assert parameters == self.parameters # implementing nproc > 1 would be straightforward, but a bit tedious #assert self.mesh.nproc == 1 kernels = self.load(path, suffix='_kernel') if not span: return kernels # set up grid x = sem.read(PATH.MODEL_INIT, 'x', 0) z = sem.read(PATH.MODEL_INIT, 'z', 0) mesh = stack(x, z) for key in parameters or self.parameters: kernels[key] = [meshsmooth(kernels[key][0], mesh, span)] unix.rm(path + '_nosmooth') unix.mv(path, path + '_nosmooth') self.save(path, kernels, suffix='_kernel') ### file transfer utilities def import_model(self, path): src = glob(path + '/' + 'model/*') dst = join(self.getpath, 'DATA/') unix.cp(src, dst) def export_model(self, path): if self.getnode == 0: unix.mkdir(path) src = glob(join(self.getpath, 'DATA/*.bin')) dst = path unix.cp(src, dst) @property def data_filenames(self): if PAR.CHANNELS: if PAR.FORMAT in ['SU', 'su']: filenames = [] for channel in PAR.CHANNELS: filenames += ['U%s_file_single.su' % channel] return filenames else: unix.cd(self.getpath) unix.cd('traces/obs') if PAR.FORMAT in ['SU', 'su']: return glob('U?_file_single.su') @property def model_databases(self): return join(self.getpath, 'DATA') @property def kernel_databases(self): return join(self.getpath, 'OUTPUT_FILES') @property def source_prefix(self): return 'SOURCE'
class specfem3d_globe(custom_import('solver', 'base')): """ Python interface for SPECFEM3D_GLOBE See base class for method descriptions """ if PAR.MATERIALS in ['Isotropic']: parameters = [] parameters += ['vp'] parameters += ['vs'] else: parameters = [] parameters += ['vpv'] parameters += ['vph'] parameters += ['vsv'] parameters += ['vsh'] parameters += ['eta'] def check(self): """ Checks parameters and paths """ super(specfem3d_globe, self).check() if 'CHANNELS' not in PAR: setattr(PAR, 'CHANNELS', 'ENZ') # check data format if 'FORMAT' not in PAR: raise Exception() def generate_data(self, **model_kwargs): """ Generates data """ self.generate_mesh(**model_kwargs) unix.cd(self.getpath) setpar('SIMULATION_TYPE', '1') setpar('SAVE_FORWARD', '.true.') call_solver(system.mpiexec(), 'bin/xspecfem3D') if PAR.FORMAT in ['ASCII', 'ascii']: src = glob('OUTPUT_FILES/*.sem.ascii') dst = 'traces/obs' unix.mv(src, dst) def generate_mesh(self, model_path=None, model_name=None, model_type='gll'): """ Performs meshing and database generation """ assert (model_name) assert (model_type) self.initialize_solver_directories() unix.cd(self.getpath) if model_type == 'gll': assert (exists(model_path)) self.check_mesh_properties(model_path) unix.cp(glob(model_path + '/' + '*'), self.model_databases) call_solver(system.mpiexec(), 'bin/xmeshfem3D') self.export_model(PATH.OUTPUT + '/' + model_name) else: raise NotImplementedError ### model input/output def load(self, path, prefix='reg1_', suffix='', verbose=False): """ reads SPECFEM model or kernel Models are stored in Fortran binary format and separated into multiple files according to material parameter and processor rank. """ model = Model(self.parameters) minmax = Minmax(self.parameters) for iproc in range(self.mesh_properties.nproc): # read database files keys, vals = loadbypar(path, self.parameters, iproc, prefix, suffix) for key, val in zip(keys, vals): model[key] += [val] minmax.update(keys, vals) if verbose: minmax.write(path, logpath=PATH.SUBMIT) return model def save(self, path, model, prefix='reg1_', suffix=''): """ writes SPECFEM3D_GLOBE transerverly isotropic model """ unix.mkdir(path) for iproc in range(self.mesh_properties.nproc): for key in ['vpv', 'vph', 'vsv', 'vsh', 'eta']: if key in self.parameters: savebin(model[key][iproc], path, iproc, prefix + key + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + key + suffix) if 'rho' in self.parameters: savebin(model['rho'][iproc], path, iproc, prefix + 'rho' + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + 'rho' + suffix) ### low-level solver interface def forward(self, path='traces/syn'): """ Calls SPECFEM3D_GLOBE forward solver """ solvertools.setpar('SIMULATION_TYPE', '1') solvertools.setpar('SAVE_FORWARD', '.true.') call_solver(system.mpiexec(), 'bin/xspecfem3D') if PAR.FORMAT in ['ASCII', 'ascii']: src = glob('OUTPUT_FILES/*.sem.ascii') dst = path unix.mv(src, dst) def adjoint(self): """ Calls SPECFEM3D_GLOBE adjoint solver """ solvertools.setpar('SIMULATION_TYPE', '3') solvertools.setpar('SAVE_FORWARD', '.false.') unix.rm('SEM') unix.ln('traces/adj', 'SEM') call_solver(system.mpiexec(), 'bin/xspecfem3D') def check_mesh_properties(self, path=None, parameters=None): if not hasattr(self, '_mesh_properties'): if not path: path = PATH.MODEL_INIT if not parameters: parameters = self.parameters nproc = 0 ngll = [] while True: dummy = loadbin(path, nproc, 'reg1_' + parameters[0]) ngll += [len(dummy)] nproc += 1 if not exists('%s/proc%06d_reg1_%s.bin' % (path, nproc, parameters[0])): break self._mesh_properties = Struct([['nproc', nproc], ['ngll', ngll]]) return self._mesh_properties def rename_data(self): """ Works around conflicting data filename conventions """ files = glob(self.getpath + '/' + 'traces/adj/*sem.ascii') unix.rename('sem.ascii', 'sem.ascii.adj', files) def initialize_adjoint_traces(self): super(specfem3d_globe, self).initialize_adjoint_traces() # workaround for SPECFEM2D's use of different name conventions for # regular traces and 'adjoint' traces if PAR.FORMAT in ['ASCII', 'ascii']: files = glob(self.getpath + '/' + 'traces/adj/*sem.ascii') unix.rename('sem.ascii', 'adj', files) @property def data_filenames(self): unix.cd(self.getpath) unix.cd('traces/obs') print 'made it here' if PAR.FORMAT in ['ASCII', 'ascii']: filenames = [] for channel in PAR.CHANNELS: filenames += glob('*.??%s.sem.ascii' % channel) return [filenames] @property def kernel_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def model_databases(self): return join(self.getpath, 'OUTPUT_FILES/DATABASES_MPI') @property def source_prefix(self): return 'CMTSOLUTION'
class double_difference(custom_import('preprocess', 'legacy')): """ Data preprocessing class """ def check(self): """ Checks parameters, paths, and dependencies """ super(DoubleDifference, self).check() def prepare_eval_grad(self, path='.'): """ Prepares solver for gradient evaluation by writing residuals and adjoint traces """ unix.cd(path) d, h = self.load(prefix='traces/obs/') s, _ = self.load(prefix='traces/syn/') d = self.apply(self.process_traces, [d], [h]) s = self.apply(self.process_traces, [s], [h]) r = self.apply(self.write_residuals, [s, d], [h], inplace=False) s = self.apply(self.generate_adjoint_traces, [s, d, r], [h]) self.save(s, h, prefix='traces/adj/') def write_residuals(self, s, d, h): """ Computes residuals from observations and synthetics """ nr = h.nr nt = h.nt dt = h.dt r = np.zeros((nr, nr)) for ir in range(nr): for jr in range(nr): if ir < jr: r[ir, jr] = (misfit.wtime(s[:, ir], s[:, jr], nt, dt) - misfit.wtime(d[:, ir], d[:, jr], nt, dt)) elif ir > jr: r[ir, jr] = -r[ir, jr] else: r[ir, jr] = 0 # write residuals np.savetxt('residuals', np.sqrt(np.sum(r * r, 0))) return np.array(r) def generate_adjoint_traces(self, s, d, r, h): """ Computes adjoint traces from observed and synthetic traces """ nr = h.nr dt = h.dt for ir in range(nr): nrm = sum((s[:, ir] * s[:, ir]) * dt) fit = np.sum(r[ir, :]) s[1:-1, ir] = (s[2:, ir] - s[0:-2, ir]) / (2. * dt) s[0, ir] = 0. s[-1, ir] = 0. s[:, ir] *= fit / nrm return s
class pbs_torque_sm(custom_import('system', 'base')): """ An interface through which to submit workflows, run tasks in serial or parallel, and perform other system functions. By hiding environment details behind a python interface layer, these classes provide a consistent command set across different computing environments. Intermediate files are written to a global scratch path PATH.SCRATCH, which must be accessible to all compute nodes. Optionally, users can provide a local scratch path PATH.LOCAL if each compute node has its own local filesystem. For important additional information, please see http://seisflows.readthedocs.org/en/latest/manual/manual.html#system-configuration """ def check(self): """ Checks parameters and paths """ # check parameters if 'TITLE' not in PAR: setattr(PAR, 'TITLE', basename(abspath('.'))) if 'WALLTIME' not in PAR: setattr(PAR, 'WALLTIME', 30.) if 'MEMORY' not in PAR: raise ParameterError(PAR, 'MEMORY') if 'VERBOSE' not in PAR: setattr(PAR, 'VERBOSE', 1) if 'NTASK' not in PAR: raise ParameterError(PAR, 'NTASK') if 'NPROC' not in PAR: raise ParameterError(PAR, 'NPROC') if 'NODESIZE' not in PAR: raise ParameterError(PAR, 'NODESIZE') if 'PBSARGS' not in PAR: setattr(PAR, 'PBSARGS', '') # check paths if 'SCRATCH' not in PATH: setattr(PATH, 'SCRATCH', join(abspath('.'), 'scratch')) if 'LOCAL' not in PATH: setattr(PATH, 'LOCAL', None) if 'SYSTEM' not in PATH: setattr(PATH, 'SYSTEM', join(PATH.SCRATCH, 'system')) if 'SUBMIT' not in PATH: setattr(PATH, 'SUBMIT', abspath('.')) if 'OUTPUT' not in PATH: setattr(PATH, 'OUTPUT', join(PATH.SUBMIT, 'output')) def submit(self, workflow): """Submits job """ unix.mkdir(PATH.OUTPUT) unix.cd(PATH.OUTPUT) # save current state self.checkpoint() # construct resource list nodes = int(PAR.NTASK / PAR.NODESIZE) cores = PAR.NTASK % PAR.NODESIZE hours = int(PAR.WALLTIME / 60) minutes = PAR.WALLTIME % 60 resources = 'walltime=%02d:%02d:00' % (hours, minutes) if nodes == 0: resources += ',mem=%dgb,nodes=1:ppn=%d' % (PAR.MEMORY, cores) elif cores == 0: resources += ',mem=%dgb,nodes=%d:ppn=%d' % (PAR.MEMORY, nodes, PAR.NODESIZE) else: resources += ',mem=%dgb,nodes=%d:ppn=%d+1:ppn=%d' % ( PAR.MEMORY, nodes, PAR.NODESIZE, cores) # construct arguments list call('qsub ' + '%s ' % PAR.PBSARGS + '-N %s ' % PAR.TITLE + '-o %s ' % (PATH.SUBMIT + '/' + 'output.log') + '-l %s ' % resources + '-j %s ' % 'oe' + findpath('seisflows.system') + '/' + 'wrappers/submit ' + '-F %s ' % PATH.OUTPUT) def run(self, classname, funcname, hosts='all', **kwargs): """ Runs tasks in serial or parallel on specified hosts """ self.checkpoint() self.save_kwargs(classname, funcname, kwargs) if hosts == 'all': # run on all available nodes call('pbsdsh ' + join(findpath('seisflows.system'), 'wrappers/export_paths.sh ') + os.getenv('PATH') + ' ' + os.getenv('LD_LIBRARY_PATH') + ' ' + join(findpath('seisflows.system'), 'wrappers/run_pbsdsh ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname + ' ' + dirname(findpath('seisflows'))) elif hosts == 'head': # run on head node call('pbsdsh ' + join(findpath('seisflows.system'), 'wrappers/export_paths.sh ') + os.getenv('PATH') + ' ' + os.getenv('LD_LIBRARY_PATH') + ' ' + join(findpath('seisflows.system'), 'wrappers/run_pbsdsh_head ') + PATH.OUTPUT + ' ' + classname + ' ' + funcname + ' ' + dirname(findpath('seisflows'))) def getnode(self): """ Gets number of running task """ return int(os.getenv('PBS_VNODENUM')) def mpiexec(self): """ Specifies MPI exectuable; used to invoke solver """ # call solver as MPI singleton when using pbsdsh return '' def save_kwargs(self, classname, funcname, kwargs): kwargspath = join(PATH.OUTPUT, 'SeisflowsObjects', classname + '_kwargs') kwargsfile = join(kwargspath, funcname + '.p') unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)