def get_acceptance_swap(self, beta, beta_tune_interval): """ Returns acceptance rate for swapping states between chains. """ logger.info( 'Counting accepted swaps of ' 'posterior chains with beta == %f', beta) worker_idxs = self.get_workers_ge_beta(beta, idxs=True) logger.info('Worker indexes of beta greater %f: %s' % (beta, list2string(worker_idxs))) tempered_worker_idxs = deepcopy(worker_idxs) for worker_idx in self.get_posterior_workers(idxs=True): tempered_worker_idxs.remove(worker_idx) logger.info('Workers of tempered chains: %s' % list2string(tempered_worker_idxs)) rowidxs, colidxs = num.meshgrid(worker_idxs, tempered_worker_idxs) n_samples = int(self.sample_count[rowidxs, colidxs].sum() + self.sample_count[colidxs, rowidxs].sum()) accepted_samples = int(self.acceptance_matrix[rowidxs, colidxs].sum() + self.acceptance_matrix[colidxs, rowidxs].sum()) logger.info( 'Number of samples %i and accepted samples %i of acceptance' ' evaluation, respectively.' % (n_samples, accepted_samples)) if n_samples: return float(accepted_samples) / float(n_samples) else: return n_samples
def init_hierarchicals(self, problem_config): """ Initialize hierarchical parameters. Ramp estimation in azimuth and range direction of a radar scene. """ hierarchicals = problem_config.hierarchicals if self.config.fit_plane: logger.info('Estimating ramp for each dataset...') for data in self.datasets: if isinstance(data, heart.DiffIFG): for hierarchical_name in data.plane_names(): hierarchical_keys = utility.list2string( hierarchicals.keys()) if not self.config.fit_plane and \ hierarchical_name in hierarchicals: raise ConfigInconsistentError( 'Plane removal disabled, but they are defined' ' in the problem configuration' ' (hierarchicals)! \n' ' Got: %s' % hierarchical_keys) if self.config.fit_plane and \ hierarchical_name not in hierarchicals: raise ConfigInconsistentError( 'Plane corrections enabled, but they are' ' not defined in the problem configuration!' ' (hierarchicals). Looking for: %s \n' ' Got: %s' % (hierarchical_name, hierarchical_keys)) param = hierarchicals[hierarchical_name] if not num.array_equal(param.lower, param.upper): kwargs = dict(name=param.name, shape=param.dimension, lower=param.lower, upper=param.upper, testval=param.testvalue, transform=None, dtype=tconfig.floatX) try: self.hierarchicals[ hierarchical_name] = Uniform(**kwargs) except TypeError: kwargs.pop('name') self.hierarchicals[hierarchical_name] = \ Uniform.dist(**kwargs) else: logger.info( 'not solving for %s, got fixed at %s' % (param.name, utility.list2string(param.lower.flatten()))) self.hierarchicals[hierarchical_name] = param.lower else: logger.info('No plane for GNSS data.') logger.info('Initialized %i hierarchical parameters ' '(ramps).' % len(self.hierarchicals))
def do_variance_estimate(self, wmap): filterer = wmap.config.filterer scalings = [] for i, (tr, target) in enumerate(zip(wmap.datasets, wmap.targets)): wavename = None # None uses first tabulated phase arrival_time = heart.get_phase_arrival_time( engine=self.engine, source=self.events[wmap.config.event_idx], target=target, wavename=wavename) if arrival_time < tr.tmin: logger.warning( 'no data for variance estimation on pre-P arrival' ' in wavemap %s, for trace %s!' % ( wmap._mapid, list2string(tr.nslc_id))) logger.info( 'Using reference arrival "%s" instead!' % wmap.name) arrival_time = heart.get_phase_arrival_time( engine=self.engine, source=self.events[wmap.config.event_idx], target=target, wavename=wmap.name) if filterer: ctrace = tr.copy() # apply all the filters for filt in filterer: filt.apply(ctrace) ctrace = ctrace.chop( tmin=tr.tmin, tmax=arrival_time - self.pre_arrival_time) nslc_id_str = list2string(ctrace.nslc_id) data = ctrace.get_ydata() if data.size == 0: raise ValueError( 'Trace %s contains no pre-P arrival data! Please either ' 'remove/blacklist or make sure data contains times before' ' the P arrival time!' % nslc_id_str) scaling = num.nanvar(data) if num.isfinite(scaling).all(): logger.debug( 'Variance estimate of %s = %g' % (nslc_id_str, scaling)) scalings.append(scaling) else: raise ValueError( 'Pre P-trace of %s contains Inf or' ' NaN!' % nslc_id_str) return scalings
def get_backend(backend): available_backends = backends.keys() if backend not in available_backends: raise NotImplementedError('Backend not supported! Options: %s' % list2string(available_backends)) return backends[backend]
def run_mpi_sampler(sampler_name, model, sampler_args, keep_tmp, n_jobs, loglevel='info'): """ Execute a sampling algorithm that requires the call of mpiexec as it uses MPI for parallelization. A run directory is created unter '/tmp/' where the sampler arguments are pickled and then reloaded by the MPI sampler script. Parameters ---------- sampler_name : string valid names: %s model : :class:`pymc3.model.Model` that holds the forward model graph sampler_args : list of sampler arguments, order is important keep_tmp : boolean if true dont remove the run directory after execution n_jobs : number of processors to call MPI with """ % list2string(samplers.keys()) from beat.info import project_root from beat.utility import dump_objects try: sampler = samplers[sampler_name] except KeyError: raise NotImplementedError('Currently only samplers: %s supported!' % list2string(samplers.keys())) runner = MPIRunner(keep_tmp=keep_tmp) args_path = pjoin(runner.tempdir, mpiargs_name) model_path = pjoin(runner.tempdir, pymc_model_name) dump_objects(model_path, model) dump_objects(args_path, sampler_args) samplerdir = pjoin(project_root, sampler) logger.info('sampler directory: %s' % samplerdir) runner.run(samplerdir, n_jobs=n_jobs, loglevel=loglevel)
def load_obspy_data(datadir): """ Load data from the directory through obspy and convert to pyrocko objects. Parameters ---------- datadir : string absolute path to the data directory Returns ------- data_traces, stations """ import obspy from pyrocko import obspy_compat obspy_compat.plant() filenames = set(glob(datadir + '/*')) remaining_f = copy.deepcopy(filenames) print(filenames) stations = [] for f in filenames: print(f) try: inv = obspy.read_inventory(f) stations.extend(inv.to_pyrocko_stations()) remaining_f.discard(f) except TypeError: logger.debug('File %s not an inventory.' % f) filenames = copy.deepcopy(remaining_f) print(filenames) data_traces = [] for f in filenames: print(f) try: stream = obspy.read(f) pyrocko_traces = stream.to_pyrocko_traces() for tr in pyrocko_traces: data_traces.append(heart.SeismicDataset.from_pyrocko_trace(tr)) remaining_f.discard(f) except TypeError: logger.debug('File %s not waveforms' % f) print(remaining_f) if len(remaining_f) > 0: logger.warning('Could not import these files %s' % utility.list2string(list(filenames))) logger.info('Imported %i data_traces and %i stations' % (len(stations), len(data_traces))) return stations, data_traces
def __init__(self, sc, project_dir, events, hypers=False): super(SeismicDistributerComposite, self).__init__(sc, events, project_dir, hypers=hypers) self.gfs = {} self.gf_names = {} self.choppers = {} self.sweep_implementation = 'c' self._mode = 'ffi' self.gfpath = os.path.join(project_dir, self._mode, bconfig.linear_gf_dir_name) self.config = sc dgc = sc.gf_config.discretization_config for pw, pl in zip(dgc.patch_widths, dgc.patch_lengths): if pw != pl: raise ValueError( 'So far only square patches supported in kinematic' ' model! - fast_sweeping issues') if len(sc.gf_config.reference_sources) > 1: logger.warning( 'So far only rupture propagation on each subfault individually' ) self.fault = self.load_fault_geometry() logger.info('Fault(s) discretized to %s [km]' ' patches.' % utility.list2string(dgc.patch_lengths)) if not hypers: self.sweepers = [] for idx in range(self.fault.nsubfaults): n_p_dip, n_p_strike = \ self.fault.ordering.get_subfault_discretization(idx) self.sweepers.append( theanof.Sweeper(dgc.patch_lengths[idx], n_p_dip, n_p_strike, self.sweep_implementation)) for wmap in self.wavemaps: logger.info('Preparing data of "%s" for optimization' % wmap.name) wmap.prepare_data(source=self.events[wmap.config.event_idx], engine=self.engine, outmode='array', chop_bounds=['b', 'c'])
def save_covs(wmap, cov_mat='pred_v'): """ Save covariance matrixes of given attribute """ covs = { utility.list2string(dataset.nslc_id): getattr(dataset.covariance, cov_mat) for dataset in wmap.datasets } outname = os.path.join( results_path, '%s_C_%s_%s' % ('seismic', cov_mat, wmap._mapid)) logger.info('"%s" to: %s' % (wmap._mapid, outname)) num.savez(outname, **covs)
def __init__(self, sc, project_dir, event, hypers=False): super(SeismicDistributerComposite, self).__init__(sc, event, project_dir, hypers=hypers) self.gfs = {} self.gf_names = {} self.choppers = {} self.sweep_implementation = 'c' self._mode = 'ffi' self.gfpath = os.path.join(project_dir, self._mode, bconfig.linear_gf_dir_name) self.config = sc sgfc = sc.gf_config for pw, pl in zip(sgfc.patch_widths, sgfc.patch_lengths): if pw != pl: raise ValueError( 'So far only square patches supported in kinematic' ' model! - fast_sweeping issues') if len(sgfc.reference_sources) > 1: raise ValueError('So far only one reference plane supported! - ' 'fast_sweeping issues') self.fault = self.load_fault_geometry() # TODO: n_subfaultssupport n_p_dip, n_p_strike = self.fault.get_subfault_discretization(0) logger.info('Fault(s) discretized to %s [km]' ' patches.' % utility.list2string(sgfc.patch_lengths)) if not hypers: self.sweeper = theanof.Sweeper(sgfc.patch_lengths[0], n_p_dip, n_p_strike, self.sweep_implementation) for wmap in self.wavemaps: logger.info('Preparing data of "%s" for optimization' % wmap.name) wmap.prepare_data(source=self.event, engine=self.engine, outmode='array')
def add_derived_variables(self, source_type, n_sources=1): try: varnames = derived_variables_mapping[source_type] logger.info('Adding derived variables %s to ' 'trace.' % list2string(varnames)) except KeyError: logger.info('No derived variables for %s' % source_type) varnames = [] for varname in varnames: shape = (n_sources, ) self.flat_names[varname] = ttab.create_flat_names(varname, shape) self.var_shapes[varname] = shape self.var_dtypes[varname] = 'float64' self.varnames.append(varname)
def __init__( self, structure='identity', pre_arrival_time=5., engine=None, events=None, sources=None, chop_bounds=['b', 'c']): avail = available_noise_structures() if structure not in avail: raise AttributeError( 'Selected noise structure "%s" not supported! Implemented' ' noise structures: %s' % (structure, list2string(avail))) self.events = events self.engine = engine self.sources = sources self.pre_arrival_time = pre_arrival_time self.structure = structure self.chop_bounds = chop_bounds
def get_random_variables(self): """ Evaluate problem setup and return random variables dictionary. Has to be executed in a "with model context"! Returns ------- rvs : dict variable random variables fixed_params : dict fixed random parameters """ pc = self.config.problem_config logger.debug('Optimization for %i sources', pc.n_sources) rvs = dict() fixed_params = dict() for param in pc.priors.values(): if not num.array_equal(param.lower, param.upper): shape = bconfig.get_parameter_shape(param, pc) kwargs = dict( name=param.name, shape=shape, lower=param.lower, upper=param.upper, testval=param.testvalue, transform=None, dtype=tconfig.floatX) try: rvs[param.name] = Uniform(**kwargs) except TypeError: kwargs.pop('name') rvs[param.name] = Uniform.dist(**kwargs) else: logger.info( 'not solving for %s, got fixed at %s' % ( param.name, list2string(param.lower.flatten()))) fixed_params[param.name] = param.lower return rvs, fixed_params
def set_stack_mode(self, mode='numpy'): """ Sets mode on witch backend the stacking is working. Dependend on that the input to the stack function has to be either of :class:`numpy.ndarray` or of :class:`theano.tensor.Tensor` Parameters ---------- mode : str on witch array to stack """ available_modes = backends.keys() if mode not in available_modes: raise GFLibraryError('Stacking mode %s not available! ' 'Available modes: %s' % list2string(available_modes)) self._mode = mode
def get_all_patches(self, datatype=None, component=None): """ Get all RectangularSource patches for the full complex fault. Parameters ---------- datatype : str 'geodetic' or 'seismic' component : str slip component to return may be %s """ % list2string(slip_directions.keys()) datatype = self._assign_datatype(datatype) component = self._assign_component(component) patches = [] for i in range(self.nsubfaults): patches += self.get_subfault_patches(i, datatype=datatype, component=component) return patches
def update_betas(self, t_scale=None): """ Update annealing schedule for all the workers. Parameters ---------- t_scale : float factor to adjust the step size in the temperatures the base step size is 1.e1 update : bool if true the current scale factor is updated by given Returns ------- list of inverse temperatures (betas) in decreasing beta order """ if t_scale is None: t_scale = self.current_scale self.current_scale = t_scale betas_post = [1. for _ in range(self.n_workers_posterior)] temperature = num.power(t_scale, num.arange(1, self.n_workers_tempered + 1)) betas_temp = (1. / temperature).tolist() betas = betas_post + betas_temp logger.info('Updated betas: %s', list2string(betas)) if len(self._worker_package_mapping) > 0: # updating worker packages self._betas = None for beta, package in zip(betas, self._worker_package_mapping.values()): package['step'].beta = beta # reset worker process check self._worker_update_check = num.zeros(self.n_workers, dtype='bool') else: self._betas = betas
def load_and_blacklist_gnss(datadir, filename, blacklist, campaign=False, components=['north', 'east', 'up']): """ Load ascii GNSS data from GLOBK, apply blacklist and initialise targets. Parameters ---------- datadir: string of path to the directory filename: string filename to load blacklist: list of strings with station names to blacklist campaign : boolean if True return gnss.GNSSCampaign otherwise list of heart.GNSSCompoundComponent components : tuple of strings ('north', 'east', 'up') for displacement components to return """ gnss_campaign = load_ascii_gnss_globk(datadir, filename, components) if gnss_campaign: for station_code in blacklist: logger.debug('Removing station %s for campaign.' % station_code) gnss_campaign.remove_station(station_code) station_names = [station.code for station in gnss_campaign.stations] logger.info('Loaded data of %i GNSS stations' % len(station_names)) logger.debug('Loaded GNSS stations %s' % utility.list2string(station_names)) if not campaign: return heart.GNSSCompoundComponent.from_pyrocko_gnss_campaign( gnss_campaign, components=components) else: return gnss_campaign
def do_variance_estimate(self, wmap): filterer = wmap.config.filterer scalings = [] for i, (tr, target) in enumerate(zip(wmap.datasets, wmap.targets)): wavename = 'any_P' # hardcode here, want always pre P time arrival_time = heart.get_phase_arrival_time( engine=self.engine, source=self.event, target=target, wavename=wavename) if arrival_time < tr.tmin: logger.warning( 'no data for variance estimation on pre-P arrival' ' in wavemap %s, for trace %s!' % ( wmap._mapid, list2string(tr.nslc_id))) logger.info( 'Using reference arrival "%s" instead!' % wmap.name) arrival_time = heart.get_phase_arrival_time( engine=self.engine, source=self.event, target=target, wavename=wmap.name) if filterer is not None: ctrace = tr.copy() ctrace.bandpass( corner_hp=filterer.lower_corner, corner_lp=filterer.upper_corner, order=filterer.order) ctrace = ctrace.chop( tmin=tr.tmin, tmax=arrival_time - self.pre_arrival_time) scaling = num.var(ctrace.get_ydata()) scalings.append(scaling) return scalings
def get_synthetics(self, point, **kwargs): """ Get synthetics for given point in solution space. Parameters ---------- point : :func:`pymc3.Point` Dictionary with model parameters kwargs especially to change output of the forward model Returns ------- list with :class:`heart.SeismicDataset` synthetics for each target """ outmode = kwargs.pop('outmode', 'stacked_traces') # GF library cut in between [b, c] no [a,d] possible chop_bounds = ['b', 'c'] order = kwargs.pop('order', 'list') ref_idx = self.config.gf_config.reference_model_idx if len(self.gfs) == 0: self.load_gfs(crust_inds=[ref_idx], make_shared=False) for gfs in self.gfs.values(): gfs.set_stack_mode('numpy') tpoint = copy.deepcopy(point) hps = self.config.get_hypernames() for hyper in hps: if hyper in tpoint: tpoint.pop(hyper) starttimes0 = num.zeros((self.fault.npatches), dtype=tconfig.floatX) for index in range(self.fault.nsubfaults): starttimes_tmp = self.fault.point2starttimes(tpoint, index=index).ravel() sf_patch_indexs = self.fault.cum_subfault_npatches[index:index + 2] starttimes0[sf_patch_indexs[0]:sf_patch_indexs[1]] = starttimes_tmp synth_traces = [] obs_traces = [] for wmap in self.wavemaps: wc = wmap.config starttimes = num.tile(starttimes0, wmap.n_t).reshape(wmap.n_t, self.fault.npatches) # station corrections if self.config.station_corrections: logger.debug('Applying station corrections ' 'for wmap {}'.format(wmap.name)) try: corrections = point[wmap.time_shifts_id] except KeyError: # got reference point from config corrections = float(point[self.correction_name]) * \ num.ones(wmap.n_t) starttimes -= num.repeat( corrections[wmap.station_correction_idxs], self.fault.npatches).reshape(wmap.n_t, self.fault.npatches) # TODO check targetidxs if station blacklisted!? targetidxs = num.atleast_2d(num.arange(wmap.n_t)).T synthetics = num.zeros( (wmap.n_t, wc.arrival_taper.nsamples(self.config.gf_config.sample_rate))) for var in self.slip_varnames: key = self.get_gflibrary_key(crust_ind=ref_idx, wavename=wmap.name, component=var) try: gflibrary = self.gfs[key] except KeyError: raise KeyError('GF library %s not loaded! Loaded GFs:' ' %s' % (key, utility.list2string(self.gfs.keys()))) from time import time gflibrary.set_stack_mode('numpy') t0 = time() synthetics += gflibrary.stack_all( targetidxs=targetidxs, starttimes=starttimes, durations=tpoint['durations'], slips=tpoint[var], interpolation=wc.interpolation) t1 = time() logger.debug('{} seconds to stack {}'.format((t1 - t0), wmap.name)) wmap_synthetics = [] for i, target in enumerate(wmap.targets): tr = Trace(ydata=synthetics[i, :], tmin=float(gflibrary.reference_times[i]), deltat=gflibrary.deltat) tr.set_codes(*target.codes) if outmode == 'tapered_data': # TODO subfault individual synthetics (use patchidxs arg) tr = [tr] wmap_synthetics.append(tr) wmap.prepare_data(source=self.events[wc.event_idx], engine=self.engine, outmode=outmode, chop_bounds=chop_bounds) if order == 'list': synth_traces.extend(wmap_synthetics) obs_traces.extend(wmap._prepared_data) elif order == 'wmap': synth_traces.append(wmap_synthetics) obs_traces.append(wmap._prepared_data) else: raise ValueError('Order "%s" is not supported' % order) return synth_traces, obs_traces
def lsq_solution(self, point, plot=False): """ Returns non-negtive least-squares solution for given input point. Parameters ---------- point : dict in solution space Returns ------- point with least-squares solution """ from scipy.optimize import nnls if self.config.problem_config.mode_config.regularization != \ 'laplacian': raise ValueError( 'Least-squares- solution for distributed slip is only ' 'available with laplacian regularization!') lc = self.composites['laplacian'] slip_varnames_candidates = ['uparr', 'utens'] slip_varnames = [] for var in slip_varnames_candidates: if var in self.varnames: slip_varnames.append(var) if len(slip_varnames) == 0.: raise ValueError( 'LSQ distributed slip solution is only available for %s,' ' which were fixed in the setup!' % list2string(slip_varnames_candidates)) Gs = [] ds = [] for datatype, composite in self.composites.items(): if datatype == 'geodetic': crust_ind = composite.config.gf_config.reference_model_idx keys = [ composite.get_gflibrary_key(crust_ind=crust_ind, wavename='static', component=var) for var in slip_varnames ] Gs.extend([composite.gfs[key]._gfmatrix for key in keys]) # removing hierarchicals from data displacements = [] for dataset in composite.datasets: displacements.append(copy.deepcopy(dataset.displacement)) displacements = composite.apply_corrections(displacements, point=point, operation='-') ds.extend(displacements) elif datatype == 'seismic': if False: for wmap in composite.wavemaps: keys = [ composite.get_gflibrary_key(crust_ind=crust_ind, wavename=wmap.name, component=var) for var in slip_varnames ] Gs.extend( [composite.gfs[key]._gfmatrix for key in keys]) ds.append(wmap._prepared_data) if len(Gs) == 0: raise ValueError('No Greens Function matrix available!' ' (needs geodetic datatype!)') G = num.vstack(Gs) D = num.vstack([lc.smoothing_op for sv in slip_varnames]) * \ point[bconfig.hyper_name_laplacian] ** 2. dzero = num.zeros(D.shape[1], dtype=tconfig.floatX) A = num.hstack([G, D]) d = num.hstack(ds + [dzero]) # m, rmse, rankA, singularsA = num.linalg.lstsq(A.T, d, rcond=None) m, res = nnls(A.T, d) npatches = self.config.problem_config.mode_config.npatches for i, var in enumerate(slip_varnames): point[var] = m[i * npatches:(i + 1) * npatches] if plot: from beat.plotting import source_geometry gc = self.composites['geodetic'] fault = gc.load_fault_geometry() source_geometry(fault, list(fault.iter_subfaults()), event=gc.event, values=point[slip_varnames[0]], title='slip [m]', datasets=gc.datasets) point['uperp'] = dzero return point
def get_synthetics(self, point, **kwargs): """ Get synthetics for given point in solution space. Parameters ---------- point : :func:`pymc3.Point` Dictionary with model parameters kwargs especially to change output of the forward model Returns ------- list with :class:`heart.SeismicDataset` synthetics for each target """ outmode = kwargs.pop('outmode', 'stacked_traces') # GF library cut in between [b, c] no [a,d] possible chop_bounds = ['b', 'c'] order = kwargs.pop('order', 'list') ref_idx = self.config.gf_config.reference_model_idx if len(self.gfs) == 0: self.load_gfs(crust_inds=[ref_idx], make_shared=False) for gfs in self.gfs.values(): gfs.set_stack_mode('numpy') tpoint = copy.deepcopy(point) hps = self.config.get_hypernames() for hyper in hps: if hyper in tpoint: tpoint.pop(hyper) # TODO make nsubfaults ready starttimes0 = self.fault.point2starttimes(tpoint, index=0).ravel() starttimes0 += point['time'] # station corrections if len(self.hierarchicals) > 0: raise NotImplementedError( 'Station corrections not fully implemented! for FFO!') # starttimes = ( # num.tile(starttimes0, wmap.n_t) + # num.repeat(self.hierarchicals[wmap.time_shifts_id][ # wmap.station_correction_idxs], # self.fault.npatches)).reshape( # wmap.n_t, self.fault.npatches) # # targetidxs = num.atleast_2d(num.arange(wmap.n_t)).T else: starttimes = starttimes0 targetidxs = num.lib.index_tricks.s_[:] # obsolete from variable obs data, patchidx = self.fault.patchmap( # index=0, dipidx=nuc_dip_idx, strikeidx=nuc_strike_idx) synth_traces = [] obs_traces = [] for wmap in self.wavemaps: synthetics = num.zeros((wmap.n_t, wmap.config.arrival_taper.nsamples( self.config.gf_config.sample_rate))) for var in self.slip_varnames: key = self.get_gflibrary_key(crust_ind=ref_idx, wavename=wmap.name, component=var) try: gflibrary = self.gfs[key] except KeyError: raise KeyError('GF library %s not loaded! Loaded GFs:' ' %s' % (key, utility.list2string(self.gfs.keys()))) gflibrary.set_stack_mode('numpy') synthetics += gflibrary.stack_all( targetidxs=targetidxs, starttimes=starttimes, durations=tpoint['durations'], slips=tpoint[var], interpolation=wmap.config.interpolation) wmap_synthetics = [] for i, target in enumerate(wmap.targets): tr = Trace(ydata=synthetics[i, :], tmin=float(gflibrary.reference_times[i]), deltat=gflibrary.deltat) tr.set_codes(*target.codes) wmap_synthetics.append(tr) wmap.prepare_data(source=self.event, engine=self.engine, outmode=outmode, chop_bounds=chop_bounds) if order == 'list': synth_traces.extend(wmap_synthetics) obs_traces.extend(wmap._prepared_data) elif order == 'wmap': synth_traces.append(wmap_synthetics) obs_traces.append(wmap._prepared_data) else: raise ValueError('Order "%s" is not supported' % order) return synth_traces, obs_traces
def init_hyperparams(self): """ Evaluate problem setup and return hyperparameter dictionary. """ pc = self.config.problem_config hyperparameters = copy.deepcopy(pc.hyperparameters) hyperparams = {} n_hyp = 0 modelinit = True self._hypernames = [] for datatype, composite in self.composites.items(): hypernames = composite.get_hypernames() for hp_name in hypernames: if hp_name in hyperparameters.keys(): hyperpar = hyperparameters.pop(hp_name) if composite.config: # only data composites if composite.config.dataset_specific_residual_noise_estimation: if datatype == 'seismic': wmap = composite.hyper2wavemap(hp_name) ndata = wmap.hypersize else: ndata = len(composite.get_all_station_names()) else: ndata = 1 else: ndata = 1 else: raise InconsistentNumberHyperparametersError( 'Datasets and -types require additional ' ' hyperparameter(s): %s!' % hp_name) if not num.array_equal(hyperpar.lower, hyperpar.upper): dimension = hyperpar.dimension * ndata kwargs = dict(name=hyperpar.name, shape=dimension, lower=num.repeat(hyperpar.lower, ndata), upper=num.repeat(hyperpar.upper, ndata), testval=num.repeat(hyperpar.testvalue, ndata), dtype=tconfig.floatX, transform=None) try: hyperparams[hp_name] = Uniform(**kwargs) except TypeError: kwargs.pop('name') hyperparams[hp_name] = Uniform.dist(**kwargs) modelinit = False n_hyp += dimension self._hypernames.append(hyperpar.name) else: logger.info( 'not solving for %s, got fixed at %s' % (hyperpar.name, list2string(hyperpar.lower.flatten()))) hyperparams[hyperpar.name] = hyperpar.lower if len(hyperparameters) > 0: raise InconsistentNumberHyperparametersError( 'There are hyperparameters in config file, which are not' ' covered by datasets/datatypes.') if modelinit: logger.info('Optimization for %i hyperparameters in total!', n_hyp) self.hyperparams = hyperparams
def seis_construct_gf_linear(engine, fault, durations_prior, velocities_prior, nucleation_time_prior, varnames, wavemap, event, nworkers=1, starttime_sampling=1., duration_sampling=1., sample_rate=1., outdirectory='./', force=False): """ Create seismic Greens Function matrix for defined source geometry by convolution of the GFs with the source time function (STF). Parameters ---------- engine : :class:`pyrocko.gf.seismosizer.LocalEngine` main path to directory containing the different Greensfunction stores targets : list of pyrocko target objects for respective phase to compute wavemap : :class:`heart.WaveformMapping` configuration parameters for handeling seismic data around Phase fault : :class:`FaultGeometry` fault object that may comprise of several sub-faults. thus forming a complex fault-geometry durations_prior : :class:`heart.Parameter` prior of durations of the STF for each patch to convolve velocities_prior : :class:`heart.Parameter` rupture velocity of earthquake prior nucleation_time_prior : :class:`heart.Parameter` prior of nucleation time of the event starttime_sampling : float incremental step size for precalculation of startime GFs duration_sampling : float incremental step size for precalculation of duration GFs sample_rate : float sample rate of synthetic traces to produce, related to non-linear GF store outpath : str directory for storage force : boolean flag to overwrite existing linear GF Library """ # get starttimes for hypocenter at corner of fault # TODO: make nsubfaults compatible npw, npl = fault.get_subfault_discretization(0) start_times = fault.get_subfault_starttimes( index=0, rupture_velocities=velocities_prior.lower.repeat(npw * npl), nuc_dip_idx=0, nuc_strike_idx=0) starttimeidxs = num.arange( int( num.floor(start_times.min() + nucleation_time_prior.lower.min()) / starttime_sampling), int( num.ceil(start_times.max() + nucleation_time_prior.upper.max()) / starttime_sampling) + 1) starttimes = starttimeidxs * starttime_sampling ndurations = error_not_whole( ((durations_prior.upper.max() - durations_prior.lower.min()) / duration_sampling), errstr='ndurations') + 1 durations = num.linspace(durations_prior.lower.min(), durations_prior.upper.max(), ndurations) logger.info('Calculating GFs for starttimes: %s \n durations: %s' % (list2string(starttimes), list2string(durations))) logger.info('Using %i workers ...' % nworkers) nstarttimes = len(starttimes) npatches = fault.npatches ntargets = len(wavemap.targets) nsamples = wavemap.config.arrival_taper.nsamples(sample_rate) for var in varnames: logger.info('For slip component: %s' % var) gfl_config = SeismicGFLibraryConfig( component=var, datatype='seismic', event=event, reference_sources=fault.get_all_subfaults(datatype='seismic', component=var), duration_sampling=duration_sampling, starttime_sampling=starttime_sampling, wave_config=wavemap.config, dimensions=(ntargets, npatches, ndurations, nstarttimes, nsamples), starttime_min=float(starttimes.min()), duration_min=float(durations.min())) gfs = SeismicGFLibrary(config=gfl_config) outpath = os.path.join(outdirectory, gfs.filename + '.npz') if os.path.exists(outpath) and not force: logger.info('Library exists: %s. ' 'Please use --force to override!' % outpath) else: if nworkers < 2: allocate = True else: allocate = False gfs.setup(ntargets, npatches, ndurations, nstarttimes, nsamples, allocate=allocate) logger.info("Setting up Green's Function Library: %s \n ", gfs.__str__()) parallel.check_available_memory(gfs.filesize) shared_gflibrary = RawArray('d', gfs.size) shared_times = RawArray('d', gfs.ntargets) work = [(engine, gfs, wavemap.targets, patch, patchidx, durations, starttimes) for patchidx, patch in enumerate( fault.get_all_patches('seismic', component=var))] p = parallel.paripool(_process_patch_seismic, work, initializer=_init_shared, initargs=(shared_gflibrary, shared_times), nprocs=nworkers) for res in p: pass # collect and store away gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape( gfs.dimensions) gfs._tmins = num.frombuffer(shared_times).reshape((gfs.ntargets)) logger.info('Storing seismic linear GF Library ...') gfs.save(outdir=outdirectory) del gfs
def master_process(comm, tags, status, model, step, n_samples, swap_interval, beta_tune_interval, n_workers_posterior, homepath, progressbar, buffer_size, resample, rm_flag): """ Master process, that does the managing. Sends tasks to workers. Collects results and writes them to the trace. Fires workers once job is done. Parameters ---------- comm : mpi.communicator tags : message tags status : mpt.status object the rest see pt_sample doc-string """ size = comm.size # total number of processes n_workers = size - 1 if n_workers_posterior >= n_workers: raise ValueError('Specified more workers that sample in the posterior,' ' than there are total number of workers') stage = -1 active_workers = 0 steps_until_tune = 0 # start sampling of chains with given seed logger.info('Master starting with %d workers' % n_workers) logger.info('Packing stuff for workers') manager = TemperingManager(step=step, n_workers=n_workers, n_workers_posterior=n_workers_posterior, model=model, progressbar=progressbar, buffer_size=buffer_size, swap_interval=swap_interval, beta_tune_interval=beta_tune_interval) stage_handler = TextStage(homepath) stage_handler.clean_directory(stage, chains=None, rm_flag=rm_flag) logger.info('Initializing result trace...') logger.info('Writing samples to file every %i samples.' % buffer_size) trace = TextChain(name=stage_handler.stage_path(stage), model=model, buffer_size=buffer_size, progressbar=progressbar) trace.setup(n_samples, 0, overwrite=False) # TODO load starting points from existing trace logger.info('Sending work packages to workers...') manager.update_betas() for beta in manager.betas: comm.recv(source=MPI.ANY_SOURCE, tag=tags.READY, status=status) source = status.Get_source() package = manager.get_package(source, resample=resample) comm.send(package, dest=source, tag=tags.INIT) logger.debug('Sent work package to worker %i' % source) active_workers += 1 count_sample = 0 counter = ChainCounter(n=n_samples, n_jobs=1, perc_disp=0.01, subject='samples') logger.info('Posterior workers %s', list2string(manager.posterior_workers)) logger.info('Tuning worker betas every %i samples. \n' % beta_tune_interval) logger.info('Sampling ...') logger.info('------------') while True: m1 = num.empty(manager.step.lordering.size) comm.Recv([m1, MPI.DOUBLE], source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source1 = status.Get_source() logger.debug('Got sample 1 from worker %i' % source1) m2 = num.empty(manager.step.lordering.size) comm.Recv([m2, MPI.DOUBLE], source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source2 = status.Get_source() logger.debug('Got sample 2 from worker %i' % source2) # write results to trace if workers sample from posterior for source, m in zip([source1, source2], [m1, m2]): if source in manager.posterior_workers: count_sample += 1 counter(source) trace.write(m, count_sample) steps_until_tune += 1 m1, m2 = manager.propose_chain_swap(m1, m2, source1, source2) # beta updating if steps_until_tune >= beta_tune_interval: manager.tune_betas() steps_until_tune = 0 if count_sample < n_samples: logger.debug('Sending states back to workers ...') for source in [source1, source2]: if not manager.worker_beta_updated(source1): comm.Send([manager.get_beta(source), MPI.DOUBLE], dest=source, tag=tags.BETA) manager.worker_beta_updated(source, check=True) comm.Send(m1, dest=source1, tag=tags.SAMPLE) comm.Send(m2, dest=source2, tag=tags.SAMPLE) else: logger.info('Requested number of samples reached!') trace.record_buffer() manager.dump_history(save_dir=stage_handler.stage_path(stage)) break logger.info('Master finished! Chain complete!') logger.debug('Firing ...') for i in range(1, size): logger.debug('Sending pay cheque to %i' % i) comm.send(None, dest=i, tag=tags.EXIT) logger.debug('Fired worker %i' % i) active_workers -= 1 logger.info('Feierabend! Sampling finished!')
def init_hierarchicals(self, problem_config): """ Initialize hierarchical parameters. Ramp estimation in azimuth and range direction of a radar scene and/or Rotation of GNSS stations around an Euler pole """ hierarchicals = problem_config.hierarchicals self._hierarchicalnames = [] for number, corr in enumerate( self.config.corrections_config.iter_corrections()): logger.info('Evaluating config for %s corrections ' 'for datasets...' % corr.feature) if corr.enabled: for data in self.datasets: if data.name in corr.dataset_names: hierarchical_names = corr.get_hierarchical_names( name=data.name, number=number) else: hierarchical_names = [] for hierarchical_name in hierarchical_names: if not corr.enabled and hierarchical_name in hierarchicals: raise ConfigInconsistentError( '%s %s disabled, but they are defined' ' in the problem configuration' ' (hierarchicals)!' % (corr.feature, data.name)) if corr.enabled and hierarchical_name not in hierarchicals \ and data.name in corr.dataset_names: raise ConfigInconsistentError( '%s %s corrections enabled, but they are' ' not defined in the problem configuration!' ' (hierarchicals)' % (corr.feature, data.name)) param = hierarchicals[hierarchical_name] if hierarchical_name not in self.hierarchicals: if not num.array_equal(param.lower, param.upper): kwargs = dict(name=param.name, shape=param.dimension, lower=param.lower, upper=param.upper, testval=param.testvalue, transform=None, dtype=tconfig.floatX) try: self.hierarchicals[ hierarchical_name] = Uniform(**kwargs) except TypeError: kwargs.pop('name') self.hierarchicals[hierarchical_name] = \ Uniform.dist(**kwargs) self._hierarchicalnames.append( hierarchical_name) else: logger.info( 'not solving for %s, got fixed at %s' % (param.name, utility.list2string( param.lower.flatten()))) self.hierarchicals[ hierarchical_name] = param.lower else: logger.info('No %s correction!' % corr.feature) logger.info('Initialized %i hierarchical parameters.' % len(self.hierarchicals))
def iter_parallel_chains(draws, step, stage_path, progressbar, model, n_jobs, chains=None, initializer=None, initargs=(), chunksize=None): """ Do Metropolis sampling over all the chains with each chain being sampled 'draws' times. Parallel execution according to n_jobs. If jobs hang for any reason they are being killed after an estimated timeout. The chains in question are being rerun and the estimated timeout is added again. Parameters ---------- draws : int number of steps that are taken within each Markov Chain step : step object of the sampler class, e.g.: :class:`beat.sampler.Metropolis`, :class:`beat.sampler.SMC` stage_path : str with absolute path to the directory where to store the sampling results progressbar : boolean flag for displaying a progressbar model : :class:`pymc3.model.Model` instance holds definition of the forward problem n_jobs : int number of jobs to run in parallel, must not be higher than the number of CPUs chains : list of integers to the chain numbers, if None then all chains from the step object are sampled initializer : function to run before execution of each sampling process initargs : tuple of arguments for the initializer chunksize : int number of chains to sample within each process Returns ------- MultiTrace object """ timeout = 0 if chains is None: chains = list(range(step.n_chains)) n_chains = len(chains) if n_chains == 0: mtrace = backend.load_multitrace(dirname=stage_path, model=model) # while is necessary if any worker times out - rerun in case while n_chains > 0: trace_list = [] logger.info('Initialising %i chain traces ...' % n_chains) for chain in chains: trace_list.append(backend.TextChain(stage_path, model=model)) max_int = np.iinfo(np.int32).max random_seeds = [randint(max_int) for _ in range(n_chains)] work = [ (draws, step, step.population[step.resampling_indexes[chain]], trace, chain, None, progressbar, model, rseed) for chain, rseed, trace in zip(chains, random_seeds, trace_list) ] tps = step.time_per_sample(np.minimum(n_jobs, 10)) logger.info('Serial time per sample: %f' % tps) if chunksize is None: if draws < 10: chunksize = int(np.ceil(float(n_chains) / n_jobs)) elif draws > 10 and tps < 1.: chunksize = int(np.ceil(float(n_chains) / n_jobs)) else: chunksize = n_jobs timeout += int(np.ceil(tps * draws)) * n_jobs + 10 if n_jobs > 1: shared_params = [ sparam for sparam in step.logp_forw.get_shared() if sparam.name in parallel._tobememshared ] logger.info('Data to be memory shared: %s' % list2string(shared_params)) if len(shared_params) > 0: if len(parallel._shared_memory.keys()) == 0: logger.info('Putting data into shared memory ...') parallel.memshare_sparams(shared_params) else: logger.info('Data already in shared memory!') else: logger.info('No data to be memshared!') else: logger.info('Not using shared memory.') p = parallel.paripool(_sample, work, chunksize=chunksize, timeout=timeout, nprocs=n_jobs, initializer=initializer, initargs=initargs) logger.info('Sampling ...') for res in p: pass # return chain indexes that have been corrupted mtrace = backend.load_multitrace(dirname=stage_path, model=model) corrupted_chains = backend.check_multitrace(mtrace, draws=draws, n_chains=step.n_chains) n_chains = len(corrupted_chains) if n_chains > 0: logger.warning('%i Chains not finished sampling,' ' restarting ...' % n_chains) chains = corrupted_chains return mtrace
def get_synthetics(self, point, **kwargs): """ Get synthetics for given point in solution space. Parameters ---------- point : :func:`pymc3.Point` Dictionary with model parameters kwargs especially to change output of the forward model Returns ------- list with :class:`heart.SeismicDataset` synthetics for each target """ order = kwargs.pop('order', 'list') ref_idx = self.config.gf_config.reference_model_idx if len(self.gfs.keys()) == 0: self.load_gfs( crust_inds=[ref_idx], make_shared=False) tpoint = copy.deepcopy(point) hps = self.config.get_hypernames() for hyper in hps: if hyper in tpoint: tpoint.pop(hyper) nuc_dip_idx, nuc_strike_idx = self.fault.fault_locations2idxs( positions_dip=tpoint['nucleation_dip'], positions_strike=tpoint['nucleation_strike'], backend='numpy') starttimes = self.fault.get_subfault_starttimes( index=0, rupture_velocities=tpoint['velocities'], nuc_dip_idx=nuc_dip_idx, nuc_strike_idx=nuc_strike_idx).flatten() patchidx = self.fault.patchmap( index=0, dipidx=nuc_dip_idx, strikeidx=nuc_strike_idx) synth_traces = [] obs_traces = [] for wmap in self.wavemaps: synthetics = num.zeros( (wmap.n_t, wmap.config.arrival_taper.nsamples( self.config.gf_config.sample_rate))) for var in self.slip_varnames: key = self.get_gflibrary_key( crust_ind=ref_idx, wavename=wmap.name, component=var) try: gflibrary = self.gfs[key] except KeyError: raise KeyError( 'GF library %s not loaded! Loaded GFs:' ' %s' % (key, utility.list2string(self.gfs.keys()))) gflibrary.set_stack_mode('numpy') synthetics += gflibrary.stack_all( starttimes=starttimes, durations=tpoint['durations'], slips=tpoint[var], interpolation=wmap.config.interpolation) wmap_synthetics = [] for i, target in enumerate(wmap.targets): tr = Trace( ydata=synthetics[i, :], tmin=float( gflibrary.reference_times[i] + tpoint['nucleation_time']), deltat=gflibrary.deltat) tr.set_codes(*target.codes) wmap_synthetics.append(tr) if self.config.station_corrections: sh = point[ self.correction_name][wmap.station_correction_idxs] for i, tr in enumerate(synth_traces): tr.tmin += sh[i] tr.tmax += sh[i] wmap_obs = heart.taper_filter_traces( wmap.datasets, arrival_taper=wmap.config.arrival_taper, filterer=wmap.config.filterer, tmins=(gflibrary.get_all_tmins(patchidx)), **kwargs) if order == 'list': synth_traces.extend(wmap_synthetics) obs_traces.extend(wmap_obs) elif order == 'wmap': synth_traces.append(wmap_synthetics) obs_traces.append(wmap_obs) else: raise ValueError('Order "%s" is not supported' % order) return synth_traces, obs_traces
def get_variance_reductions(self, point, results=None, weights=None, chop_bounds=['a', 'd']): """ Parameters ---------- point : dict with parameters to point in solution space to calculate variance reductions Returns ------- dict of floats, keys are nslc_ids """ if results is None: results = self.assemble_results(point, order='list', chop_bounds=chop_bounds) ndatasets = len(self.datasets) assert len(results) == ndatasets if weights is None: self.analyse_noise(point, chop_bounds=chop_bounds) self.update_weights(point, chop_bounds=chop_bounds) weights = self.weights nweights = len(weights) assert nweights == ndatasets logger.debug('n weights %i , n datasets %i' % (nweights, ndatasets)) assert nweights == ndatasets logger.debug('Calculating variance reduction for solution ...') var_reds = OrderedDict() for data_trc, weight, result in zip(self.datasets, weights, results): icov = data_trc.covariance.inverse data = result.processed_obs.get_ydata() residual = result.processed_res.get_ydata() nom = residual.T.dot(icov).dot(residual) denom = data.T.dot(icov).dot(data) logger.debug('nom %f, denom %f' % (float(nom), float(denom))) var_red = 1 - (nom / denom) nslc_id = utility.list2string(data_trc.nslc_id) logger.debug('Variance reduction for %s is %f' % (nslc_id, var_red)) if 0: from matplotlib import pyplot as plt fig, ax = plt.subplots(1, 1) im = ax.imshow(data_trc.covariance.data) plt.colorbar(im) plt.show() var_reds[nslc_id] = var_red return var_reds
def init_hierarchicals(self, problem_config): """ Initialise random variables for temporal station corrections. """ hierarchicals = problem_config.hierarchicals self._hierarchicalnames = [] if not self.config.station_corrections and \ self.correction_name in hierarchicals: raise ConfigInconsistentError( 'Station corrections disabled, but they are defined' ' in the problem configuration!') if self.config.station_corrections and \ self.correction_name not in hierarchicals: raise ConfigInconsistentError( 'Station corrections enabled, but they are not defined' ' in the problem configuration!') if self.correction_name in hierarchicals: logger.info( 'Estimating time shift for each station and waveform map...') for wmap in self.wavemaps: hierarchical_name = wmap.time_shifts_id nhierarchs = len(wmap.get_station_names()) logger.info('For %s with %i shifts' % (hierarchical_name, nhierarchs)) if hierarchical_name in hierarchicals: logger.info('Using wavemap specific imported:' ' %s ' % hierarchical_name) param = hierarchicals[hierarchical_name] else: logger.info('Using global %s' % self.correction_name) param = copy.deepcopy( problem_config.hierarchicals[self.correction_name]) param.lower = num.repeat(param.lower, nhierarchs) param.upper = num.repeat(param.upper, nhierarchs) param.testvalue = num.repeat(param.testvalue, nhierarchs) if hierarchical_name not in self.hierarchicals: if not num.array_equal(param.lower, param.upper): kwargs = dict(name=hierarchical_name, shape=param.dimension, lower=param.lower, upper=param.upper, testval=param.testvalue, transform=None, dtype=tconfig.floatX) try: self.hierarchicals[hierarchical_name] = Uniform( **kwargs) except TypeError: kwargs.pop('name') self.hierarchicals[hierarchical_name] = \ Uniform.dist(**kwargs) self._hierarchicalnames.append(hierarchical_name) else: logger.info( 'not solving for %s, got fixed at %s' % (param.name, utility.list2string(param.lower.flatten()))) self.hierarchicals[hierarchical_name] = param.lower