def get_segments_by_id(self, n_iter, seg_ids, include_pcoords = True): '''Get segments from the data manager, employing caching where possible''' if len(seg_ids) == 0: return [] seg_index = self.get_seg_index(n_iter) all_wtg_parent_ids = self.get_wtg_parent_array(n_iter) segments = [] if include_pcoords: pcoords = self.get_pcoords(n_iter, seg_ids) for (isegid, seg_id) in enumerate(seg_ids): row = seg_index[seg_id] parents_offset = row['wtg_offset'] n_parents = row['wtg_n_parents'] segment = Segment(seg_id = seg_id, n_iter = n_iter, status = row['status'], endpoint_type = row['endpoint_type'], walltime = row['walltime'], cputime = row['cputime'], weight = row['weight'], ) if include_pcoords: segment.pcoord = pcoords[isegid] parent_ids = all_wtg_parent_ids[parents_offset:parents_offset+n_parents] segment.wtg_parent_ids = {int(parent_id) for parent_id in parent_ids} segment.parent_id = int(parent_ids[0]) segments.append(segment) return segments
def rebin_current(self, parent_segments): '''Reconstruct walkers for the current iteration based on (presumably) new binning. The previous iteration's segments must be provided (as ``parent_segments``) in order to update endpoint types appropriately.''' self._prep_we() self._parent_map = {segment.seg_id: segment for segment in parent_segments} # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter new_segment = Segment(n_iter=segment.n_iter, parent_id=segment.parent_id, weight=segment.weight, wtg_parent_ids=set(segment.wtg_parent_ids or []), pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[0] self.next_iter_binning[ibin].add(new_segment) self._run_we()
def rebin_current(self, parent_segments): '''Reconstruct walkers for the current iteration based on (presumably) new binning. The previous iteration's segments must be provided (as ``parent_segments``) in order to update endpoint types appropriately.''' self._prep_we() self._parent_map = { segment.seg_id: segment for segment in parent_segments } # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter new_segment = Segment(n_iter=segment.n_iter, parent_id=segment.parent_id, weight=segment.weight, wtg_parent_ids=set(segment.wtg_parent_ids or []), pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[0] self.next_iter_binning[ibin].add(new_segment) self._run_we()
def get_segments_by_id(self, n_iter, seg_ids, include_pcoords = True): '''Get segments from the data manager, employing caching where possible''' if len(seg_ids) == 0: return [] seg_index = self.get_seg_index(n_iter) all_wtg_parent_ids = self.get_wtg_parent_array(n_iter) segments = [] if include_pcoords: pcoords = self.get_pcoords(n_iter, seg_ids) for (isegid, seg_id) in enumerate(seg_ids): row = seg_index[seg_id] parents_offset = row['wtg_offset'] n_parents = row['wtg_n_parents'] segment = Segment(seg_id = seg_id, n_iter = n_iter, status = row['status'], endpoint_type = row['endpoint_type'], walltime = row['walltime'], cputime = row['cputime'], weight = row['weight'], ) if include_pcoords: segment.pcoord = pcoords[isegid] parent_ids = all_wtg_parent_ids[parents_offset:parents_offset+n_parents] segment.wtg_parent_ids = {long(parent_id) for parent_id in parent_ids} segment.parent_id = long(parent_ids[0]) segments.append(segment) return segments
def segment(self, init_pcoord, final_pcoord, weight=1.0): segment= Segment(n_iter=1, seg_id=self._seg_id, pcoord=self.system.new_pcoord_array(), weight=weight) segment.pcoord[0] = init_pcoord segment.pcoord[1] = final_pcoord self._seg_id += 1 return segment
def segment(self, init_pcoord, final_pcoord, weight=1.0): segment = Segment(n_iter=1, seg_id=self._seg_id, pcoord=self.system.new_pcoord_array(), weight=weight) segment.pcoord[0] = init_pcoord segment.pcoord[1] = final_pcoord self._seg_id += 1 return segment
def test_merge_by_weight(self): selected_counts = {0: 0, 1: 0} alpha = 0.01 nrounds = 1000 from scipy.stats import binom # lower and upper bounds of 95% CI for selecting the segment with weight 1/3 lb = binom.ppf(alpha / 2.0, nrounds, 1.0 / 3.0) ub = binom.ppf(1.0 - alpha / 2.0, nrounds, 1.0 / 3.0) system = WESTSystem() system.bin_mapper = RectilinearBinMapper([[0.0, 1.0]]) system.bin_target_counts = numpy.array([1]) system.pcoord_len = 2 self.we_driver = WEDriver(system=system) self.system = system self._seg_id = 0 segments = [ Segment(n_iter=1, seg_id=0, pcoord=numpy.array([[0], [0.25]], dtype=numpy.float32), weight=1.0 / 3.0), Segment(n_iter=1, seg_id=1, pcoord=numpy.array([[0], [0.75]], dtype=numpy.float32), weight=2.0 / 3.0) ] for _iround in xrange(nrounds): for segment in segments: segment.endpoint_type = Segment.SEG_ENDPOINT_UNSET self.we_driver.new_iteration() self.we_driver.assign(segments) self.we_driver.construct_next() assert len(self.we_driver.next_iter_binning[0]) == 1 newseg = self.we_driver.next_iter_binning[0].pop() assert segments[ newseg. parent_id].endpoint_type == Segment.SEG_ENDPOINT_CONTINUES assert segments[ ~newseg.parent_id].endpoint_type == Segment.SEG_ENDPOINT_MERGED selected_counts[newseg.parent_id] += 1 print(selected_counts) assert lb <= selected_counts[0] <= ub, ( 'Incorrect proportion of histories selected.' 'this is expected about {:%} of the time; retry test.'.format( alpha))
def test_split_with_adjust_istates(self): # this is a split followed by merge, for segments which are initial states self.system.bin_target_counts = numpy.array([5, 5]) segments = [ self.segment(1.5, 0.5, weight=0.125), self.segment(1.5, 0.5, weight=0.125), self.segment(0.0, 1.5, weight=0.375), self.segment(0.0, 1.5, weight=0.375) ] self.we_driver.new_iteration() self.we_driver._prep_we() self.we_driver.used_initial_states[-1] = None self.we_driver.used_initial_states[-2] = None for ibin, bin in enumerate(self.we_driver.next_iter_binning): pc = numpy.array([[0.5 + ibin], [0.0]]) for iseg in xrange(6): segment = Segment(n_iter=1, seg_id=None, weight=1.0 / 12.0, parent_id=-(ibin + 1), pcoord=pc) bin.add(segment) for ibin in xrange(len(self.we_driver.next_iter_binning)): # This will raise KeyError if initial state tracking is done improperly self.we_driver._adjust_count(ibin) assert len(self.we_driver.next_iter_binning[0]) == 5 assert len(self.we_driver.next_iter_binning[1]) == 5
def construct_next(self): '''Construct walkers for the next iteration, by running weighted ensemble recycling and bin/split/merge on the segments previously assigned to bins using ``assign``. Enough unused initial states must be present in ``self.avail_initial_states`` for every recycled walker to be assigned an initial state. After this function completes, ``self.flux_matrix`` contains a valid flux matrix for this iteration (including any contributions from recycling from the previous iteration), and ``self.next_iter_segments`` contains a list of segments ready for the next iteration, with appropriate values set for weight, endpoint type, parent walkers, and so on. ''' self._prep_we() # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter segment.endpoint_type = Segment.SEG_ENDPOINT_CONTINUES new_segment = Segment(n_iter=segment.n_iter + 1, parent_id=segment.seg_id, weight=segment.weight, wtg_parent_ids=[segment.seg_id], pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[-1] self.next_iter_binning[ibin].add(new_segment) # Store a link to the parent segment, so we can update its endpoint status as we need, # based on its ID self._parent_map[segment.seg_id] = segment self._run_we() log.debug('used initial states: {!r}'.format(self.used_initial_states)) log.debug('available initial states: {!r}'.format( self.avail_initial_states))
def construct_next(self): '''Construct walkers for the next iteration, by running weighted ensemble recycling and bin/split/merge on the segments previously assigned to bins using ``assign``. Enough unused initial states must be present in ``self.avail_initial_states`` for every recycled walker to be assigned an initial state. After this function completes, ``self.flux_matrix`` contains a valid flux matrix for this iteration (including any contributions from recycling from the previous iteration), and ``self.next_iter_segments`` contains a list of segments ready for the next iteration, with appropriate values set for weight, endpoint type, parent walkers, and so on. ''' self._prep_we() # Create new segments for the next iteration # We assume that everything is going to continue without being touched by recycling or WE, and # adjust later new_pcoord_array = self.system.new_pcoord_array n_iter = None for ibin, _bin in enumerate(self.final_binning): for segment in _bin: if n_iter is None: n_iter = segment.n_iter else: assert segment.n_iter == n_iter segment.endpoint_type = Segment.SEG_ENDPOINT_CONTINUES new_segment = Segment(n_iter=segment.n_iter+1, parent_id=segment.seg_id, weight=segment.weight, wtg_parent_ids=[segment.seg_id], pcoord=new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0] = segment.pcoord[-1] self.next_iter_binning[ibin].add(new_segment) # Store a link to the parent segment, so we can update its endpoint status as we need, # based on its ID self._parent_map[segment.seg_id] = segment self._run_we() log.debug('used initial states: {!r}'.format(self.used_initial_states)) log.debug('available initial states: {!r}'.format(self.avail_initial_states))
def _split_walker(self, segment, m, bin): '''Split the walker ``segment`` (in ``bin``) into ``m`` walkers''' bin.remove(segment) new_segments = [] for _inew in xrange(0,m): new_segment = Segment(n_iter = segment.n_iter, #previously incremented weight = segment.weight/m, parent_id = segment.parent_id, wtg_parent_ids = set(segment.wtg_parent_ids), pcoord = segment.pcoord.copy(), status = Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0,:] = segment.pcoord[0,:] new_segments.append(new_segment) bin.update(new_segments) if log.isEnabledFor(logging.DEBUG): log.debug('splitting {!r} into {:d}:\n {!r}'.format(segment, m, new_segments)) return new_segments
def _split_walker(self, segment, m, bin): '''Split the walker ``segment`` (in ``bin``) into ``m`` walkers''' bin.remove(segment) new_segments = [] for _inew in range(0, m): new_segment = Segment( n_iter=segment.n_iter, #previously incremented weight=segment.weight / m, parent_id=segment.parent_id, wtg_parent_ids=set(segment.wtg_parent_ids), pcoord=segment.pcoord.copy(), status=Segment.SEG_STATUS_PREPARED) new_segment.pcoord[0, :] = segment.pcoord[0, :] new_segments.append(new_segment) bin.update(new_segments) if log.isEnabledFor(logging.DEBUG): log.debug('splitting {!r} into {:d}:\n {!r}'.format( segment, m, new_segments)) return new_segments
def update_args_env_segment(self, template_args, environ, segment): template_args['segment'] = segment environ[self.ENV_CURRENT_SEG_INITPOINT] = Segment.initpoint_type_names[ segment.initpoint_type] if segment.initpoint_type == Segment.SEG_INITPOINT_CONTINUES: # Could use actual parent object here if the work manager cared to pass that much data # to us (we'd need at least the subset of parents for all segments sent in the call to propagate) # that may make a good west.cfg option for future crazy extensibility, but for now, # just populate the bare minimum parent = Segment(n_iter=segment.n_iter - 1, seg_id=segment.parent_id) parent_template_args = dict(template_args) parent_template_args['segment'] = parent environ[self.ENV_PARENT_SEG_ID] = str( segment.parent_id if segment.parent_id is not None else -1) environ[self.ENV_PARENT_DATA_REF] = self.makepath( self.segment_ref_template, parent_template_args) elif segment.initpoint_type == Segment.SEG_INITPOINT_NEWTRAJ: # This segment is initiated from a basis state; WEST_PARENT_SEG_ID and WEST_PARENT_DATA_REF are # set to the basis state ID and data ref initial_state = self.initial_states[segment.initial_state_id] basis_state = self.basis_states[initial_state.basis_state_id] if self.ENV_BSTATE_ID not in environ: self.update_args_env_basis_state(template_args, environ, basis_state) if self.ENV_ISTATE_ID not in environ: self.update_args_env_initial_state(template_args, environ, initial_state) assert initial_state.istate_type in ( InitialState.ISTATE_TYPE_BASIS, InitialState.ISTATE_TYPE_GENERATED) if initial_state.istate_type == InitialState.ISTATE_TYPE_BASIS: environ[self.ENV_PARENT_DATA_REF] = environ[ self.ENV_BSTATE_DATA_REF] else: # initial_state.type == InitialState.ISTATE_TYPE_GENERATED environ[self.ENV_PARENT_DATA_REF] = environ[ self.ENV_ISTATE_DATA_REF] environ[self.ENV_CURRENT_SEG_ID] = str( segment.seg_id if segment.seg_id is not None else -1) environ[self.ENV_CURRENT_SEG_DATA_REF] = self.makepath( self.segment_ref_template, template_args) return template_args, environ
('old_seg_id', seg_id_dtype), ('new_istate_id', seg_id_dtype)]) state_map = numpy.empty((n_segments,),dtype=state_map_dtype) state_map['old_n_iter'] = n_iter for (iseg, (index_row, pcoord)) in enumerate(zip(old_index, old_final_pcoords)): istate = istates[iseg] istate.iter_created = 0 istate.iter_used = 1 istate.istate_type = InitialState.ISTATE_TYPE_RESTART istate.istate_status = InitialState.ISTATE_STATUS_PREPARED istate.pcoord = pcoord segment = Segment(n_iter=1, seg_id=iseg, weight=index_row['weight'], parent_id =-(istate.state_id+1), wtg_parent_ids = [-(istate.state_id+1)], status=Segment.SEG_STATUS_PREPARED) segment.pcoord = numpy.zeros((pcoord_len, pcoord_ndim), dtype=pcoord.dtype) segment.pcoord[0] = pcoord segments.append(segment) state_map[iseg]['old_seg_id'] = iseg state_map[iseg]['new_istate_id'] = istate.state_id dm_new.update_initial_states(istates, n_iter=0) dm_new.prepare_iteration(n_iter=1, segments=segments) # Update current iteration and close both files dm_new.current_iteration = 1 dm_new.close_backing() dm_old.close_backing()
def populate_initial(self, initial_states, weights, system=None): '''Create walkers for a new weighted ensemble simulation. One segment is created for each provided initial state, then binned and split/merged as necessary. After this function is called, next_iter_segments will yield the new segments to create, used_initial_states will contain data about which of the provided initial states were used, and avail_initial_states will contain data about which initial states were unused (because their corresponding walkers were merged out of existence). ''' # This has to be down here to avoid an import race from westpa.core.data_manager import weight_dtype EPS = numpy.finfo(weight_dtype).eps system = system or westpa.core.rc.get_system_driver() self.new_iteration(initial_states=[], target_states=[], bin_mapper=system.bin_mapper, bin_target_counts=system.bin_target_counts) # Create dummy segments segments = [] for (seg_id, (initial_state, weight)) in enumerate(zip(initial_states, weights)): dummy_segment = Segment(n_iter=0, seg_id=seg_id, parent_id=-(initial_state.state_id + 1), weight=weight, wtg_parent_ids=set( [-(initial_state.state_id + 1)]), pcoord=system.new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) dummy_segment.pcoord[[0, -1]] = initial_state.pcoord segments.append(dummy_segment) # Adjust weights, if necessary tprob = sum(weights) if abs(1.0 - tprob) > len(weights) * EPS: pscale = 1.0 / tprob log.warning( 'Weights of initial segments do not sum to unity; scaling by {:g}' .format(pscale)) for segment in segments: segment.weight *= pscale self.assign(segments, initializing=True) self.construct_next() # We now have properly-constructed initial segments, except for parent information, # and we need to mark initial states as used or unused istates_by_id = {state.state_id: state for state in initial_states} dummysegs_by_id = self._parent_map self.avail_initial_states = dict(istates_by_id) self.used_initial_states = {} for segment in self.next_iter_segments: segment.parent_id = dummysegs_by_id[segment.parent_id].parent_id segment.wtg_parent_ids = set([segment.parent_id]) assert segment.initpoint_type == Segment.SEG_INITPOINT_NEWTRAJ istate = istates_by_id[segment.initial_state_id] try: self.used_initial_states[ istate.state_id] = self.avail_initial_states.pop( istate.state_id) except KeyError: # Shared by more than one segment, and already marked as used pass for used_istate in self.used_initial_states.values(): used_istate.iter_used = 1
def _merge_walkers(self, segments, cumul_weight, bin): '''Merge the given ``segments`` in ``bin``, previously sorted by weight, into one conglomerate segment. ``cumul_weight`` is the cumulative sum of the weights of the ``segments``; this may be None to calculate here.''' if cumul_weight is None: cumul_weight = numpy.add.accumulate( [segment.weight for segment in segments]) glom = Segment( n_iter=segments[0]. n_iter, # assumed correct (and equal among all segments) weight=cumul_weight[len(segments) - 1], status=Segment.SEG_STATUS_PREPARED, pcoord=self.system.new_pcoord_array(), ) # Select the history to use # The following takes a random number in the interval 0 <= x < glom.weight, then # sees where this value falls among the (sorted) weights of the segments being merged; # this ensures that a walker with (e.g.) twice the weight of its brethren has twice the # probability of having its history selected for continuation iparent = numpy.digitize((random.uniform(0, glom.weight), ), cumul_weight)[0] gparent_seg = segments[iparent] # Inherit history from this segment ("gparent" stands for "glom parent", as opposed to historical # parent). glom.parent_id = gparent_seg.parent_id glom.pcoord[0, :] = gparent_seg.pcoord[0, :] # Weight comes from all segments being merged, and therefore all their # parent segments glom.wtg_parent_ids = set() for segment in segments: glom.wtg_parent_ids |= segment.wtg_parent_ids # Remove merged walkers from consideration before treating initial states bin.difference_update(segments) # The historical parent of gparent is continued; all others are marked as merged for segment in segments: if segment is gparent_seg: # we must ignore initial states here... if segment.parent_id >= 0: self._parent_map[ segment. parent_id].endpoint_type = Segment.SEG_ENDPOINT_CONTINUES else: # and "unuse" an initial state here (recall that initial states are in 1:1 correspondence # with the segments they initiate), except when a previously-split particle is being # merged if segment.parent_id >= 0: self._parent_map[ segment. parent_id].endpoint_type = Segment.SEG_ENDPOINT_MERGED else: if segment.initial_state_id in { segment.initial_state_id for segment in bin }: log.debug( 'initial state in use by other walker; not removing' ) else: initial_state = self.used_initial_states.pop( segment.initial_state_id) log.debug( 'freeing initial state {!r} for future use (merged)' .format(initial_state)) self.avail_initial_states[ initial_state.state_id] = initial_state initial_state.iter_used = None if log.isEnabledFor(logging.DEBUG): log.debug('merging ({:d}) {!r} into 1:\n {!r}'.format( len(segments), segments, glom)) bin.add(glom)
state_map = numpy.empty((n_segments,),dtype=state_map_dtype) state_map['old_n_iter'] = n_iter for (iseg, (index_row, pcoord)) in enumerate(izip(old_index, old_final_pcoords)): istate = istates[iseg] istate.iter_created = 0 istate.iter_used = 1 #istate.istate_type = InitialState.ISTATE_TYPE_RESTART istate.istate_type = InitialState.ISTATE_TYPE_BASIS istate.istate_status = InitialState.ISTATE_STATUS_PREPARED istate.pcoord = pcoord segment = Segment(n_iter=1, seg_id=iseg, weight=index_row['weight'], #parent_id =-(istate.state_id+1), parent_id = (istate.state_id), #wtg_parent_ids = [-(istate.state_id+1)], wtg_parent_ids = [(istate.state_id)], status=Segment.SEG_STATUS_PREPARED) segment.pcoord = numpy.zeros((pcoord_len, pcoord_ndim), dtype=pcoord.dtype) segment.pcoord[0] = pcoord segments.append(segment) state_map[iseg]['old_seg_id'] = iseg state_map[iseg]['new_istate_id'] = istate.state_id dm_new.update_initial_states(istates, n_iter=0) dm_new.prepare_iteration(n_iter=1, segments=segments) # Update current iteration and close both files dm_new.current_iteration = 1 dm_new.close_backing() dm_old.close_backing()
iter_used=1, istate_type=InitialState.ISTATE_TYPE_GENERATED, istate_status=InitialState.ISTATE_STATUS_PREPARED, pcoord=all_pcoord[struct_id]) initial_states[struct_id] = istate istate_filename = 'istates/struct_{:06d}.gro'.format(istate_id) uv.selectAtoms('all').positions = coord_ds[struct_id] uv.selectAtoms('all').write(istate_filename) print( ' wrote {} containing initial state {} from structure {} with weight {}' .format(istate_filename, istate_id, struct_id, weight)) istate_id += 1 segment = Segment(seg_id=seg_id, n_iter=1, weight=weight, pcoord=system.new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) segment.parent_id = -(istate.state_id + 1) segment.wtg_parent_ids = [segment.parent_id] segment.pcoord[0, ] = istate.pcoord[:] segments.append(segment) seg_id += 1 sys.stdout.flush() data_manager.save_target_states([], n_iter=1) data_manager.create_initial_states(len(initial_states), 1) data_manager.update_initial_states(initial_states.values(), n_iter=1) data_manager.prepare_iteration(1, segments) data_manager.flush_backing() data_manager.close_backing()
def from_data_manager(cls, n_iter, seg_id, data_manager = None): '''Construct and return a trajectory trace whose last segment is identified by ``seg_id`` in the iteration number ``n_iter``.''' data_manager = data_manager or westpa.rc.get_data_manager() # These values are used later on endpoint_type = None pcoord_dtype = None pcoord_pt_shape = None seginfo = [] parent_id = seg_id while n_iter > 0 and parent_id >= 0: seg_id = parent_id iter_group = data_manager.get_iter_group(n_iter) pcoord_ds = iter_group['pcoord'] seg_index = iter_group['seg_index'] n_segs = pcoord_ds.shape[0] pcoord_len = pcoord_ds.shape[1] assert seg_id < n_segs indexrow = seg_index[seg_id] final_pcoord = pcoord_ds[seg_id, pcoord_len-1] weight = indexrow['weight'] cputime = indexrow['cputime'] walltime = indexrow['walltime'] try: parent_id = long(indexrow['parent_id']) except IndexError: # old HDF5 version parent_id = long(iter_group['parents'][indexrow['parents_offset']]) if endpoint_type is None: endpoint_type = indexrow['endpoint_type'] pcoord_pt_shape = pcoord_ds.shape[2:] pcoord_dtype = pcoord_ds.dtype seginfo.append((n_iter, seg_id, weight, walltime, cputime, final_pcoord)) del iter_group, pcoord_ds, seg_index n_iter -= 1 # loop terminates with parent_id set to the identifier of the initial state, # seg_id set to the identifier of the first segment in the trajectory, and # n_iter set to one less than the iteration of the first segment first_iter = n_iter + 1 first_seg_id = seg_id first_parent_id = parent_id # Initial segment (for fetching initial state) first_segment = Segment(n_iter=first_iter, seg_id=first_seg_id, parent_id=first_parent_id) seginfo.reverse() summary_dtype = numpy.dtype([('n_iter', n_iter_dtype), ('seg_id', seg_id_dtype), ('weight', weight_dtype), ('walltime', utime_dtype), ('cputime', utime_dtype), ('final_pcoord', pcoord_dtype, pcoord_pt_shape), ]) summary = numpy.array(seginfo, dtype=summary_dtype) try: initial_state = data_manager.get_segment_initial_states([first_segment], first_iter)[0] except KeyError: # old HDF5 version assert parent_id < 0 istate_pcoord = data_manager.get_iter_group(first_iter)['pcoord'][first_seg_id,0] istate_id = -(first_parent_id+1) basis_state = None initial_state = InitialState(istate_id, None, iter_created=0, pcoord=istate_pcoord) else: basis_state = data_manager.get_basis_states(first_iter)[initial_state.basis_state_id] return cls(summary, endpoint_type, basis_state, initial_state, data_manager)
def _merge_walkers(self, segments, cumul_weight, bin): '''Merge the given ``segments`` in ``bin``, previously sorted by weight, into one conglomerate segment. ``cumul_weight`` is the cumulative sum of the weights of the ``segments``; this may be None to calculate here.''' if cumul_weight is None: cumul_weight = numpy.add.accumulate([segment.weight for segment in segments]) glom = Segment(n_iter = segments[0].n_iter, # assumed correct (and equal among all segments) weight = cumul_weight[len(segments)-1], status = Segment.SEG_STATUS_PREPARED, pcoord = self.system.new_pcoord_array(), ) # Select the history to use # The following takes a random number in the interval 0 <= x < glom.weight, then # sees where this value falls among the (sorted) weights of the segments being merged; # this ensures that a walker with (e.g.) twice the weight of its brethren has twice the # probability of having its history selected for continuation iparent = numpy.digitize((random.uniform(0,glom.weight),),cumul_weight)[0] gparent_seg = segments[iparent] # Inherit history from this segment ("gparent" stands for "glom parent", as opposed to historical # parent). glom.parent_id = gparent_seg.parent_id glom.pcoord[0,:] = gparent_seg.pcoord[0,:] # Weight comes from all segments being merged, and therefore all their # parent segments glom.wtg_parent_ids = set() for segment in segments: glom.wtg_parent_ids |= segment.wtg_parent_ids # Remove merged walkers from consideration before treating initial states bin.difference_update(segments) # The historical parent of gparent is continued; all others are marked as merged for segment in segments: if segment is gparent_seg: # we must ignore initial states here... if segment.parent_id >= 0: self._parent_map[segment.parent_id].endpoint_type = Segment.SEG_ENDPOINT_CONTINUES else: # and "unuse" an initial state here (recall that initial states are in 1:1 correspondence # with the segments they initiate), except when a previously-split particle is being # merged if segment.parent_id >= 0: self._parent_map[segment.parent_id].endpoint_type = Segment.SEG_ENDPOINT_MERGED else: if segment.initial_state_id in {segment.initial_state_id for segment in bin}: log.debug('initial state in use by other walker; not removing') else: initial_state = self.used_initial_states.pop(segment.initial_state_id) log.debug('freeing initial state {!r} for future use (merged)'.format(initial_state)) self.avail_initial_states[initial_state.state_id] = initial_state initial_state.iter_used = None if log.isEnabledFor(logging.DEBUG): log.debug('merging ({:d}) {!r} into 1:\n {!r}'.format(len(segments), segments, glom)) bin.add(glom)
def populate_initial(self, initial_states, weights, system=None): '''Create walkers for a new weighted ensemble simulation. One segment is created for each provided initial state, then binned and split/merged as necessary. After this function is called, next_iter_segments will yield the new segments to create, used_initial_states will contain data about which of the provided initial states were used, and avail_initial_states will contain data about which initial states were unused (because their corresponding walkers were merged out of existence). ''' # This has to be down here to avoid an import race from west.data_manager import weight_dtype EPS = numpy.finfo(weight_dtype).eps system = system or westpa.rc.get_system_driver() self.new_iteration(initial_states=[], target_states=[], bin_mapper=system.bin_mapper, bin_target_counts=system.bin_target_counts) # Create dummy segments segments = [] for (seg_id, (initial_state,weight)) in enumerate(izip(initial_states,weights)): dummy_segment = Segment(n_iter=0, seg_id=seg_id, parent_id=-(initial_state.state_id+1), weight=weight, wtg_parent_ids=set([-(initial_state.state_id+1)]), pcoord=system.new_pcoord_array(), status=Segment.SEG_STATUS_PREPARED) dummy_segment.pcoord[[0,-1]] = initial_state.pcoord segments.append(dummy_segment) # Adjust weights, if necessary tprob = sum(weights) if abs(1.0 - tprob) > len(weights) * EPS: pscale = 1.0/tprob log.warning('Weights of initial segments do not sum to unity; scaling by {:g}'.format(pscale)) for segment in segments: segment.weight *= pscale self.assign(segments, initializing=True) self.construct_next() # We now have properly-constructed initial segments, except for parent information, # and we need to mark initial states as used or unused istates_by_id = {state.state_id: state for state in initial_states} dummysegs_by_id = self._parent_map self.avail_initial_states = dict(istates_by_id) self.used_initial_states = {} for segment in self.next_iter_segments: segment.parent_id = dummysegs_by_id[segment.parent_id].parent_id segment.wtg_parent_ids=set([segment.parent_id]) assert segment.initpoint_type == Segment.SEG_INITPOINT_NEWTRAJ istate = istates_by_id[segment.initial_state_id] try: self.used_initial_states[istate.state_id] = self.avail_initial_states.pop(istate.state_id) except KeyError: # Shared by more than one segment, and already marked as used pass for used_istate in self.used_initial_states.itervalues(): used_istate.iter_used = 1