def process_args(self, args): self.progress.process_args(args) self.data_reader.process_args(args) with self.data_reader: self.iter_range.process_args(args) self.task_callable = get_object(args.task_callable, path=['.']) if args.crawler_instance is not None: self.crawler = get_object(args.crawler_instance, path=['.']) else: self.crawler = WESTPACrawler()
def bins_from_yaml_dict(bin_dict): kwargs = deepcopy(bin_dict) typename = kwargs.pop('type') try: mapper_type = getattr(sys.modules['westpa.core.binning'], typename) except AttributeError: try: mapper_type = get_object(typename) except AttributeError: raise KeyError('unknown bin mapper type {!r}'.format(typename)) if not issubclass(mapper_type, BinMapper): raise ValueError('{} is not a BinMapper'.format(mapper_type.__name__)) if mapper_type is RectilinearBinMapper: boundary_lists = kwargs.pop('boundaries', None) if boundary_lists is None: raise KeyError('RectilinearBinMapper: missing boundaries') parsed_lists = boundary_lists[:] for iboundary, boundary in enumerate(boundary_lists): if boundary.__class__ == str: parsed_lists[iboundary] = parsePCV(boundary)[0] else: parsed_lists[iboundary] = list(map((lambda x: float(x) if isinstance(x, str) else x), boundary)) return RectilinearBinMapper(parsed_lists) elif mapper_type is RecursiveBinMapper: base_mapper_config = kwargs.pop('base', None) base_mapper_config = kwargs.pop('base_mapper', base_mapper_config) if base_mapper_config is None: raise KeyError('RecursiveBinMapper: missing base_mapper') base_mapper = bins_from_yaml_dict(base_mapper_config) start_index = kwargs.pop('start_index', 0) rec_mapper = RecursiveBinMapper(base_mapper, start_index) mapper_configs = kwargs.pop('mappers') mappers = [] if mapper_configs is not None: for config in mapper_configs: replaced_bin = config.pop('replaces_bin_at', None) replaced_bin = config.pop('at', replaced_bin) if replaced_bin is None: raise KeyError('RecursiveBinMapper: missing replaces_bin_at for ' 'at least one of the child mappers') mapper = bins_from_yaml_dict(config) mappers.append((mapper, replaced_bin)) for mapper, replaced_bin in mappers: rec_mapper.add_mapper(mapper, replaced_bin) return rec_mapper else: try: return mapper_type(**kwargs) except Exception: log.exception('exception instantiating mapper') raise
def get_dfunc_method(self, plugin_config): try: methodname = plugin_config['dfunc_method'] except KeyError: raise ConfigItemMissing('dfunc_method') dfunc_method = extloader.get_object(methodname) log.info('loaded stringmethod dfunc method {!r}'.format(dfunc_method)) return dfunc_method
def load_plugins(self): try: plugins_config = westpa.rc.config['west', 'plugins'] except KeyError: return for plugin_config in (plugins_config or []): plugin_name = plugin_config['plugin'] if plugin_config.get('enabled', True): log.info('loading plugin {!r}'.format(plugin_name)) plugin = extloader.get_object(plugin_name)(self, plugin_config) log.debug('loaded plugin {!r}'.format(plugin))
def get_mapper_func(self, plugin_config): try: methodname = plugin_config['mapper_func'] except KeyError: return False mapper_func = extloader.get_object(methodname) log.info( 'loaded adaptive voronoi mapper function {!r}'.format(mapper_func)) return mapper_func
def process_args(self, args): self.progress.process_args(args) self.data_reader.process_args(args) with self.data_reader: self.iter_range.process_args(args) predicate = get_object(args.predicate_function,path=['.']) if not callable(predicate): raise TypeError('predicate object {!r} is not callable'.format(predicate)) self.predicate = predicate self.invert = bool(args.invert) self.include_ancestors = bool(args.include_ancestors) self.output_filename = args.output
def process_args(self, args): if args.construct_dataset: self.dsspec = FnDSSpec( self.h5filename, get_object(args.construct_dataset, path=['.'])) elif args.dsspecs: self.dsspec = MultiDSSpec([ SingleSegmentDSSpec.from_string(dsspec, self.h5filename) for dsspec in args.dsspecs ]) else: # we can only get here if a default dataset name was specified assert self.default_dsname self.dsspec = SingleSegmentDSSpec(self.h5filename, self.default_dsname)
def get_string_method(self, plugin_config): try: methodname = plugin_config['string_method'] except KeyError: raise ConfigItemMissing('string_method') if methodname.lower() == 'default': str_method = DefaultStringMethod else: str_method = extloader.get_object(methodname) assert issubclass(str_method, WESTStringMethod) log.debug('loaded stringmethod string method {!r}'.format(str_method)) return str_method
def get_avgpos_method(self, plugin_config): try: methodname = plugin_config['avgpos_method'] except KeyError: raise ConfigItemMissing('avgpos_method') if methodname.lower() == 'cartesian': avgpos_method = self.avgpos_cartesian else: avgpos_method = extloader.get_object(methodname) log.info( 'loaded stringmethod avgpos method {!r}'.format(avgpos_method)) return avgpos_method
def mapper_from_function(funcspec): '''Return a mapper constructed by calling a function in a named module. ``funcspec`` should be formatted as ``[PATH]:MODULE.FUNC``. This function loads MODULE, optionally adding PATH to the search path, then returns MODULE.FUNC()''' if ':' in funcspec: (pathpart, funcpart) = funcspec.rsplit(':') pathinfo = ['.'] + pathpart.split(':') else: funcpart = funcspec pathinfo = ['.'] fn = get_object(funcpart, ['.'] + pathinfo) mapper = fn() log.debug('loaded {!r} from {!r}'.format(mapper, fn)) return mapper
def process_args(self, args): self.plotscale = args.plotscale self.input_h5 = h5py.File(args.input, 'r') self.plot_output_filename = args.plot_output self.hdf5_output_filename = args.hdf5_output self.plot_contour = args.plot_contour if args.title: self.plottitle = args.title if args.range: self.plotrange = self.parse_range(args.range) if args.firstdim: self.dimensions.append(self.parse_dimspec(args.firstdim)) if not args.firstdim: self.dimensions.append({'idim': 0, 'label': 'dimension 0'}) if args.enerzero: lenerzero = args.enerzero.lower() if lenerzero not in ('min', 'max'): try: self.enerzero = float(args.enerzero) except ValueError: raise ValueError('invalid energy zero point {!r}'.format( args.enerzero)) else: self.enerzero = lenerzero else: self.enerzero = 'min' self.avail_iter_start, self.avail_iter_stop = h5io.get_iter_range( self.input_h5['histograms']) try: self.avail_iter_step = h5io.get_iter_step( self.input_h5['histograms']) except KeyError: self.avail_iter_step = 1 log.info( 'HDF5 file {!r} contains data for iterations {} -- {} with a step of {}' .format(args.input, self.avail_iter_start, self.avail_iter_stop, self.avail_iter_step)) if args.postprocess_function: self.postprocess_function = get_object(args.postprocess_function, path=['.'])
def process_args(self, args): if args.construct_wdataset: self.dsspec = FnDSSpec( self.h5filename, get_object(args.construct_wdataset, path=['.'])) elif args.dsspecs: self.dsspec = MultiDSSpec([ SingleSegmentDSSpec.from_string(dsspec, self.h5filename) for dsspec in args.dsspecs ]) else: # we can only get here if a default dataset name was specified assert self.default_dsname # we gotta slice by weight for weights if we want to get the default to work self.dsspec = SingleIterDSSpec(self.h5filename, self.default_dsname, slice=np.index_exp['weight'])
def process_args(self, args): self.progress.process_args(args) self.data_reader.process_args(args) # Necessary to open the file to get the current iteration # if we want to use the mapper in the file self.data_reader.open(mode='r+') self.n_iter = self.data_reader.current_iteration # If we decide to use this option for iteration selection: # getattr(args,'bins_from_h5file',None) or self.data_reader.current_iteration with self.data_reader: self.dssynth.h5filename = self.data_reader.we_h5filename self.dssynth.process_args(args) if args.config_from_file is False: self.binning.set_we_h5file_info(self.n_iter, self.data_reader) self.binning.process_args(args) self.output_filename = args.output if args.config_from_file: if not args.scheme: raise ValueError('A scheme must be specified.') else: self.load_config_from_west(args.scheme) elif args.states: self.parse_cmdline_states(args.states) elif args.states_from_file: self.load_state_file(args.states_from_file) elif args.states_from_function: self.load_states_from_function( get_object(args.states_from_function, path=['.'])) if self.states and len(self.states) < 2: raise ValueError('zero, two, or more macrostates are required') # self.output_file = WESTPAH5File(args.output, 'w', creating_program=True) log.debug('state list: {!r}'.format(self.states)) self.subsample = args.subsample if args.subsample is not None else False
def __init__(self, rc=None): super().__init__(rc) # A mapping of environment variables to template strings which will be # added to the environment of all children launched. self.addtl_child_environ = dict() # A mapping of executable name ('propagator', 'pre_iteration', 'post_iteration') to # a dictionary of attributes like 'executable', 'stdout', 'stderr', 'environ', etc. self.exe_info = {} self.exe_info['propagator'] = {} self.exe_info['pre_iteration'] = {} self.exe_info['post_iteration'] = {} self.exe_info['get_pcoord'] = {} self.exe_info['gen_istate'] = {} # A mapping of data set name ('pcoord', 'coord', 'com', etc) to a dictionary of # attributes like 'loader', 'dtype', etc self.data_info = {} self.data_info['pcoord'] = {} # Validate configuration config = self.rc.config for key in [ ('west', 'executable', 'propagator', 'executable'), ('west', 'data', 'data_refs', 'segment'), ('west', 'data', 'data_refs', 'basis_state'), ('west', 'data', 'data_refs', 'initial_state'), ]: config.require(key) self.segment_ref_template = config['west', 'data', 'data_refs', 'segment'] self.basis_state_ref_template = config['west', 'data', 'data_refs', 'basis_state'] self.initial_state_ref_template = config['west', 'data', 'data_refs', 'initial_state'] # Load additional environment variables for all child processes self.addtl_child_environ.update({k: str(v) for k, v in (config['west', 'executable', 'environ'] or {}).items()}) # Load configuration items relating to child processes for child_type in ('propagator', 'pre_iteration', 'post_iteration', 'get_pcoord', 'gen_istate'): child_info = config.get(['west', 'executable', child_type]) if not child_info: continue info_prefix = ['west', 'executable', child_type] # require executable to be specified if anything is specified at all config.require(info_prefix + ['executable']) self.exe_info[child_type]['executable'] = child_info['executable'] self.exe_info[child_type]['stdin'] = child_info.get('stdin', os.devnull) self.exe_info[child_type]['stdout'] = child_info.get('stdout', None) self.exe_info[child_type]['stderr'] = child_info.get('stderr', None) self.exe_info[child_type]['cwd'] = child_info.get('cwd', None) if child_type not in ('propagator', 'get_pcoord', 'gen_istate'): self.exe_info[child_type]['enabled'] = child_info.get('enabled', True) else: # for consistency, propagator, get_pcoord, and gen_istate can never be disabled self.exe_info[child_type]['enabled'] = True # apply environment modifications specific to this executable self.exe_info[child_type]['environ'] = {k: str(v) for k, v in (child_info.get('environ') or {}).items()} log.debug('exe_info: {!r}'.format(self.exe_info)) # Load configuration items relating to dataset input self.data_info['pcoord'] = {'name': 'pcoord', 'loader': pcoord_loader, 'enabled': True, 'filename': None} dataset_configs = config.get(['west', 'executable', 'datasets']) or [] for dsinfo in dataset_configs: try: dsname = dsinfo['name'] except KeyError: raise ValueError('dataset specifications require a ``name`` field') if dsname != 'pcoord': check_bool(dsinfo.setdefault('enabled', True)) else: # can never disable pcoord collection dsinfo['enabled'] = True loader_directive = dsinfo.get('loader') if loader_directive: loader = get_object(loader_directive) elif dsname != 'pcoord': loader = aux_data_loader dsinfo['loader'] = loader self.data_info.setdefault(dsname, {}).update(dsinfo) log.debug('data_info: {!r}'.format(self.data_info))
def map_binless(coords, mask, output, *args, **kwargs): '''Adaptively groups walkers according to a user-defined grouping function that is defined externally. Very general implementation but limited to only a two dimensional progress coordinate (for now).''' n_groups = kwargs.get("n_groups") n_dims = kwargs.get("n_dims") group_function = get_object(kwargs.get("group_function")) log.debug(f'binless arguments: {kwargs}') try: group_function_kwargs = kwargs.get( 'group_function_kwargs')['group_arguments'] except KeyError: group_function_kwargs = {} ndim = n_dims if not np.any(mask): return output allcoords = np.copy(coords) allmask = np.copy(mask) isfinal = None splitting = False # the segments should be sent in by the driver as half initial segments and half final segments # allcoords contains all segments # coords should contain ONLY final segments if coords.shape[1] > ndim: if coords.shape[1] > ndim + 1: isfinal = allcoords[:, ndim + 1].astype(np.bool_) else: isfinal = np.ones(coords.shape[0], dtype=np.bool_) coords = coords[isfinal, :ndim] mask = mask[isfinal] splitting = True # in case where there is no final segments but initial ones in range if not np.any(mask): coords = allcoords[:, :ndim] mask = allmask splitting = False # filter the list of coordinates (which contains coordinates outside of the binless region) # to obtain only the ones we want to cluster # this is done with all dimensions at once binless_coords = coords[mask] nsegs_binless = len(binless_coords) # we need to make sure that the number of segments in the binless region is greater than # the number of clusters we request # if only one segment in the binless region, assign it to a single cluster if nsegs_binless == 1: clusters = [0] # if there are more than one segment in the binless region but still the total is less than # our target number, adjust our target number to be the number of segments in the binless # region minus one elif nsegs_binless < n_groups: clusters = group_function(binless_coords, nsegs_binless, splitting, **group_function_kwargs) # if there are enough segments in the binless region, proceed as planned elif nsegs_binless >= n_groups: clusters = group_function(binless_coords, n_groups, splitting, **group_function_kwargs) # this is a good place to say this... output is a list which matches the length of allcoords # allcoords is a collection of all initial and final segment coords for that iteration # we first filtered those to only contain the final data points, since those are the ones we care # about clustering # we then filtered to only have the coords in the binless region, since, again, those are what we care about # we then assigned each to a cluster which is essentially a slitting index # all that's left is to find where each binless segment is in the output and insert the cluster index there for idx, val in enumerate(binless_coords): if ndim > 1: mask2 = np.logical_and(allcoords[:, 0] == val[0], allcoords[:, 1] == val[1]) else: mask2 = allcoords[:, 0] == val[0] output[mask2] = clusters[idx] return output