def _update_rvecs(self, rvecs): self.cell = Cell(rvecs) if self.cell.nvec != 3: raise ValueError( 'RDF can only be computed for 3D periodic systems.') if (2 * self.rcut > self.cell.rspacings * (1 + 2 * self.nimage)).any(): raise ValueError( 'The 2*rcut argument should not exceed any of the cell spacings.' )
def _generate_ffs(self, nguests): for iguest in range(len(self._ffs),nguests): if len(self._ffs)==0: # The very first force field, no guests system = System.create_empty() system.cell = Cell(self.guest.cell.rvecs) elif len(self._ffs)==1: # The first real force field, a single guest system = self.guest else: # Take the system of the lastly generated force field (N-1) guests # and add an additional guest system = self._ffs[-1].system.merge(self.guest) self._ffs.append(self.ff_generator(system, self.guest))
class RDF(AnalysisHook): def __init__(self, rcut, rspacing, f=None, start=0, end=-1, max_sample=None, step=None, select0=None, select1=None, pairs_sr=None, nimage=0, pospath='trajectory/pos', poskey='pos', cellpath=None, cellkey=None, outpath=None): """Computes a radial distribution function (RDF) **Argument:** rcut The cutoff for the RDF analysis. This should be lower than the spacing between the primitive cell planes, multiplied by (1+2*nimage). rspacing The width of the bins to build up the RDF. **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start, end, max_sample, step arguments to setup the selection of time slices. See ``get_slice`` for more information. select0 A list of atom indexes that are considered for the computation of the rdf. If not given, all atoms are used. select1 A list of atom indexes that are needed to compute an RDF between two disjoint sets of atoms. (If there is some overlap between select0 and select1, an error will be raised.) If this is None, an 'internal' RDF will be computed for the atoms specified in select0. pairs_sr An array with short-range pairs of atoms (shape K x 2). When given, an additional RDFs is generated for the short-range pairs (rdf_sr). nimage The number of cell images to consider in the computation of the pair distances. By default, this is zero, meaning that only the minimum image convention is used. pospath The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. This is only needed for an off-line analysis poskey In case of an on-line analysis, this is the key of the state item that contains the data from which the RDF is derived. cellpath The path the time-dependent cell vector data. This is only needed when the cell parameters are variable and the analysis is off-line. cellkey The key of the stateitem that contains the cell vectors. This is only needed when the cell parameters are variable and the analysis is done on-line. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_rdf' % path. If this path already exists, it will be removed first. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the RDF is built up as the itertive algorithm progresses. The end option is ignored and max_sample is not applicable to an on-line analysis. """ if select0 is not None: if len(select0) != len(set(select0)): raise ValueError('No duplicates are allowed in select0') if len(select0) == 0: raise ValueError('select0 can not be an empty list') if select1 is not None: if len(select1) != len(set(select1)): raise ValueError('No duplicates are allowed in select1') if len(select1) == 0: raise ValueError('select1 can not be an empty list') if select0 is not None and select1 is not None and len(select0) + len( select1) != len(set(select0) | set(select1)): raise ValueError( 'No overlap is allowed between select0 and select1. If you want to compute and RDF within a set of atoms, omit the select1 argument.' ) if select0 is None and select1 is not None: raise ValueError('select1 can not be given without select0.') self.rcut = rcut self.rspacing = rspacing self.select0 = select0 self.select1 = select1 self.pairs_sr = self._process_pairs_sr(pairs_sr) self.nimage = nimage self.nbin = int(self.rcut / self.rspacing) self.bins = np.arange(self.nbin + 1) * self.rspacing self.d = self.bins[:-1] + 0.5 * self.rspacing self.rdf_sum = np.zeros(self.nbin, float) self.CN_sum = np.zeros(self.nbin, float) if self.pairs_sr is not None: self.rdf_sum_sr = np.zeros(self.nbin, float) self.nsample = 0 if outpath is None: outpath = pospath + '_rdf' analysis_inputs = { 'pos': AnalysisInput(pospath, poskey), 'cell': AnalysisInput(cellpath, cellkey, False) } AnalysisHook.__init__(self, f, start, end, max_sample, step, analysis_inputs, outpath, False) def _process_pairs_sr(self, pairs_sr): '''Process the short-range pairs The following modifications are made to the list of short-range pairs: - The pairs that do not fit in select0 (and select1) are left out. - The list is properly sorted. Note that the argument pairs_sr provided to the constructor is not modified in-place. It is therefore safe to reuse it for another RDF analysis. ''' if pairs_sr is None: return None elif self.select1 is None: index0 = dict((atom0, i0) for i0, atom0 in enumerate(self.select0)) index1 = index0 else: index0 = dict((atom0, i0) for i0, atom0 in enumerate(self.select0)) index1 = dict((atom1, i1) for i1, atom1 in enumerate(self.select1)) my_pairs_sr = [] for atom0, atom1 in pairs_sr: i0 = index0.get(atom0) i1 = index1.get(atom1) if i0 is None or i1 is None: i0 = index0.get(atom1) i1 = index1.get(atom0) if i0 is None or i1 is None: continue if self.select1 is None and i0 < i1: i0, i1 = i1, i0 my_pairs_sr.append((i0, i1)) if len(my_pairs_sr) > 0: my_pairs_sr.sort() return np.array(my_pairs_sr) def _update_rvecs(self, rvecs): self.cell = Cell(rvecs) if self.cell.nvec != 3: raise ValueError( 'RDF can only be computed for 3D periodic systems.') if (2 * self.rcut > self.cell.rspacings * (1 + 2 * self.nimage)).any(): raise ValueError( 'The 2*rcut argument should not exceed any of the cell spacings.' ) def configure_online(self, iterative, st_pos, st_cell=None): self.natom = iterative.ff.system.natom self._update_rvecs(iterative.ff.system.cell.rvecs) def configure_offline(self, ds_pos, ds_cell=None): if ds_cell is None: # In this case, we have a unit cell that does not change shape. # It must be configured just once. if 'rvecs' in self.f['system']: self._update_rvecs(self.f['system/rvecs'][:]) else: self._update_rvecs(None) # get the total number of atoms self.natom = self.f['system/numbers'].shape[0] def init_first(self): '''Setup some work arrays''' # determine the number of atoms if self.select0 is None: self.natom0 = self.natom else: self.natom0 = len(self.select0) self.pos0 = np.zeros((self.natom0, 3), float) # the number of pairs if self.select1 is None: self.npair = (self.natom0 * (self.natom0 - 1)) // 2 self.pos1 = None else: self.natom1 = len(self.select1) self.pos1 = np.zeros((self.natom1, 3), float) self.npair = self.natom0 * self.natom1 # multiply the number of pairs by all images self.npair *= (1 + 2 * self.nimage)**3 # Prepare the output self.work = np.zeros(self.npair, float) AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('rdf', (self.nbin, ), float) self.outg.create_dataset('CN', (self.nbin, ), float) self.outg['d'] = self.d if self.pairs_sr is not None: self.outg.create_dataset('rdf_sr', (self.nbin, ), float) def read_online(self, st_pos, st_cell=None): if st_cell is not None: self._update_rvecs(st_cell.value) if self.select0 is None: self.pos0[:] = st_pos.value else: self.pos0[:] = st_pos.value[self.select0] if self.select1 is not None: self.pos1[:] = st_pos.value[self.select1] def read_offline(self, i, ds_pos, ds_cell=None): if ds_cell is not None: self._update_rvecs(np.array(ds_cell[i])) if self.select0 is None: ds_pos.read_direct(self.pos0, (i, )) else: ds_pos.read_direct(self.pos0, (i, self.select0)) if self.select1 is not None: ds_pos.read_direct(self.pos1, (i, self.select1)) def compute_iteration(self): self.cell.compute_distances(self.work, self.pos0, self.pos1, nimage=self.nimage) counts = np.histogram(self.work, bins=self.bins)[0] normalization = self.npair / ( self.cell.volume * (1 + 2 * self.nimage)**3) * (4 * np.pi * self.rspacing) * self.d**2 self.rdf_sum += counts / normalization self.CN_sum += counts / (self.natom0 * 4 * np.pi * self.rspacing * self.d**2) if self.pairs_sr is not None: self.cell.compute_distances(self.work[:len(self.pairs_sr)], self.pos0, self.pos1, pairs=self.pairs_sr, do_include=True) counts_sr = np.histogram(self.work[:len(self.pairs_sr)], bins=self.bins)[0] self.rdf_sum_sr += counts_sr / normalization self.nsample += 1 def compute_derived(self): # derive the RDF and the CN from scipy.integrate import cumtrapz self.rdf = self.rdf_sum / self.nsample if self.select1 is None: self.CN_sum *= (1 + 2 * self.nimage)**3 * self.natom0**2 / self.npair self.CN = cumtrapz(4 * np.pi * self.d**2 * self.CN_sum / self.nsample, self.d, initial=0.) if self.pairs_sr is not None: self.rdf_sr = self.rdf_sum_sr / self.nsample # store everything in the h5py file if self.outg is not None: self.outg['rdf'][:] = self.rdf self.outg['CN'][:] = self.CN if self.pairs_sr is not None: self.outg['rdf_sr'][:] = self.rdf_sr def plot(self, fn_png='rdf.png'): import matplotlib.pyplot as pt pt.clf() xunit = log.length.conversion pt.plot(self.d / xunit, self.rdf, 'k-', drawstyle='steps-mid') if self.pairs_sr is not None: pt.plot(self.d / xunit, self.rdf_sr, 'r-', drawstyle='steps-mid') pt.xlabel('Distance [%s]' % log.length.notation) pt.ylabel('RDF') pt.xlim(self.bins[0] / xunit, self.bins[-1] / xunit) pt.savefig(fn_png)
def align_cell(self, lcs=None, swap=True): """Align the unit cell with respect to the Cartesian Axes frame **Optional Arguments:** lcs The linear combinations of the unit cell that must get aligned. This is a 2x3 array, where each row represents a linear combination of cell vectors. The first row is for alignment with the x-axis, second for the z-axis. The default value is:: np.array([ [1, 0, 0], [0, 0, 1], ]) swap By default, the first alignment is done with the z-axis, then with the x-axis. The order is reversed when swap is set to False. The alignment of the first linear combination is always perfect. The alignment of the second linear combination is restricted to a plane. The cell is always made right-handed. The coordinates are also rotated with respect to the origin, but never inverted. The attributes of the system are modified in-place. Note that this method only works on 3D periodic systems. """ from molmod import Rotation, deg # define the target target = np.array([ [1, 0, 0], [0, 0, 1], ]) # default value for linear combination if lcs is None: lcs = target.copy() # The starting values pos = self.pos rvecs = self.cell.rvecs.copy() if rvecs.shape != (3,3): raise TypeError('The align_cell method only supports 3D periodic systems.') # Optionally swap a cell vector if the cell is not right-handed. if np.linalg.det(rvecs) < 0: # Find a reasonable vector to swap... index = rvecs.sum(axis=1).argmin() rvecs[index] *= -1 # Define the source source = np.dot(lcs, rvecs) # Do the swapping if swap: target = target[::-1] source = source[::-1] # auxiliary function def get_angle_axis(t, s): cos = np.dot(s, t)/np.linalg.norm(s)/np.linalg.norm(t) angle = np.arccos(np.clip(cos, -1, 1)) axis = np.cross(s, t) return angle, axis # first alignment angle, axis = get_angle_axis(target[0], source[0]) if np.linalg.norm(axis) > 0: r1 = Rotation.from_properties(angle, axis, False) pos = r1*pos rvecs = r1*rvecs source = r1*source # second alignment # Make sure the source is orthogonal to target[0] s1p = source[1] - target[0]*np.dot(target[0], source[1]) angle, axis = get_angle_axis(target[1], s1p) r2 = Rotation.from_properties(angle, axis, False) pos = r2*pos rvecs = r2*rvecs # assign self.pos = pos self.cell = Cell(rvecs)
def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, dipoles=None, radii2=None, masses=None): ''' **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii that determine shape of charge distribution rho[i]=charges[i]/(sqrt(pi)radii[i]**3)*exp(-(|r-pos[i]|/radii[i])**2) dipoles An array of atomic dipoles radii2 An array of atomic radii that determine shape of dipole distribution rho[i]=-(dipoles[i] dot r-pos[i])*2.0/(sqrt(pi)radii2[i]**5)*exp(-(|r-pos[i]|/radii[i])**2) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError('Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError('The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).') self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived()
class System(object): def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, dipoles=None, radii2=None, masses=None): ''' **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii that determine shape of charge distribution rho[i]=charges[i]/(sqrt(pi)radii[i]**3)*exp(-(|r-pos[i]|/radii[i])**2) dipoles An array of atomic dipoles radii2 An array of atomic radii that determine shape of dipole distribution rho[i]=-(dipoles[i] dot r-pos[i])*2.0/(sqrt(pi)radii2[i]**5)*exp(-(|r-pos[i]|/radii[i])**2) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError('Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError('The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).') self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived() def _init_log(self): if log.do_medium: log('Unit cell') log.hline() log('Number of periodic dimensions: %i' % self.cell.nvec) lengths, angles = self.cell.parameters names = 'abc' for i in xrange(len(lengths)): log('Cell parameter %5s: %10s' % (names[i], log.length(lengths[i]))) names = 'alpha', 'beta', 'gamma' for i in xrange(len(angles)): log('Cell parameter %5s: %10s' % (names[i], log.angle(angles[i]))) log.hline() log.blank() def _init_derived(self): if self.bonds is not None: self._init_derived_bonds() if self.scopes is not None: self._init_derived_scopes() elif self.scope_ids is not None: raise ValueError('The scope_ids only make sense when the scopes argument is given.') if self.ffatypes is not None: self._init_derived_ffatypes() elif self.ffatype_ids is not None: raise ValueError('The ffatype_ids only make sense when the ffatypes argument is given.') def _init_derived_bonds(self): # 1-bond neighbors self.neighs1 = dict((i,set([])) for i in xrange(self.natom)) for i0, i1 in self.bonds: self.neighs1[i0].add(i1) self.neighs1[i1].add(i0) # 2-bond neighbors self.neighs2 = dict((i,set([])) for i in xrange(self.natom)) for i0, n0 in self.neighs1.iteritems(): for i1 in n0: for i2 in self.neighs1[i1]: # Require that there are no shorter paths than two bonds between # i0 and i2. Also avoid duplicates. if i2 > i0 and i2 not in self.neighs1[i0]: self.neighs2[i0].add(i2) self.neighs2[i2].add(i0) # 3-bond neighbors self.neighs3 = dict((i,set([])) for i in xrange(self.natom)) for i0, n0 in self.neighs1.iteritems(): for i1 in n0: for i3 in self.neighs2[i1]: # Require that there are no shorter paths than three bonds # between i0 and i3. Also avoid duplicates. if i3 != i0 and i3 not in self.neighs1[i0] and i3 not in self.neighs2[i0]: self.neighs3[i0].add(i3) self.neighs3[i3].add(i0) # report some basic stuff on screen if log.do_medium: log('Analysis of the bonds:') bond_types = {} for i0, i1 in self.bonds: key = tuple(sorted([self.numbers[i0], self.numbers[i1]])) bond_types[key] = bond_types.get(key, 0) + 1 log.hline() log(' First Second Count') for (num0, num1), count in sorted(bond_types.iteritems()): log('%6i %6i %5i' % (num0, num1, count)) log.hline() log.blank() log('Analysis of the neighbors:') log.hline() log('Number of first neighbors: %6i' % (sum(len(n) for n in self.neighs1.itervalues())/2)) log('Number of second neighbors: %6i' % (sum(len(n) for n in self.neighs2.itervalues())/2)) log('Number of third neighbors: %6i' % (sum(len(n) for n in self.neighs3.itervalues())/2)) # Collect all types of 'environments' for each element. This is # useful to double check the bonds envs = {} for i0 in xrange(self.natom): num0 = self.numbers[i0] nnums = tuple(sorted(self.numbers[i1] for i1 in self.neighs1[i0])) key = (num0, nnums) envs[key] = envs.get(key, 0)+1 # Print the environments on screen log.hline() log('Element Neighboring elements Count') for (num0, nnums), count in sorted(envs.iteritems()): log('%7i %20s %5i' % (num0, ','.join(str(num1) for num1 in nnums), count)) log.hline() log.blank() def _init_derived_scopes(self): if self.scope_ids is None: if len(self.scopes) != self.natom: raise TypeError('When the scope_ids are derived automatically, the length of the scopes list must match the number of atoms.') lookup = {} scopes = [] self.scope_ids = np.zeros(self.natom, int) for i in xrange(self.natom): scope = self.scopes[i] scope_id = lookup.get(scope) if scope_id is None: scope_id = len(scopes) scopes.append(scope) lookup[scope] = scope_id self.scope_ids[i] = scope_id self.scopes = scopes for scope in self.scopes: check_name(scope) # check the range of the ids if self.scope_ids.min() != 0 or self.scope_ids.max() != len(self.scopes)-1: raise ValueError('The ffatype_ids have incorrect bounds.') if log.do_medium: log('The following scopes are present in the system:') log.hline() log(' Scope ID Number of atoms') log.hline() for scope_id, scope in enumerate(self.scopes): log('%22s %3i %3i' % (scope, scope_id, (self.scope_ids==scope_id).sum())) log.hline() log.blank() def _init_derived_ffatypes(self): if self.ffatype_ids is None: if len(self.ffatypes) != self.natom: raise TypeError('When the ffatype_ids are derived automatically, the length of the ffatypes list must match the number of atoms.') lookup = {} ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in xrange(self.natom): if self.scope_ids is None: ffatype = self.ffatypes[i] key = ffatype, None else: scope_id = self.scope_ids[i] ffatype = self.ffatypes[i] key = ffatype, scope_id ffatype_id = lookup.get(key) if ffatype_id is None: ffatype_id = len(ffatypes) ffatypes.append(ffatype) lookup[key] = ffatype_id self.ffatype_ids[i] = ffatype_id self.ffatypes = ffatypes for ffatype in self.ffatypes: check_name(ffatype) # check the range of the ids if self.ffatype_ids.min() != 0 or self.ffatype_ids.max() != len(self.ffatypes)-1: raise ValueError('The ffatype_ids have incorrect bounds.') # differentiate ffatype_ids if the same ffatype_id is used in different # scopes if self.scopes is not None: self.ffatype_id_to_scope_id = {} fixed_fids = {} for i in xrange(self.natom): fid = self.ffatype_ids[i] sid = self.ffatype_id_to_scope_id.get(fid) if sid is None: self.ffatype_id_to_scope_id[fid] = self.scope_ids[i] elif sid != self.scope_ids[i]: # We found the same ffatype_id in a different scope_id. This # must be fixed. First check if we have already a new # scope_id ready sid = self.scope_ids[i] new_fid = fixed_fids.get((sid, fid)) if new_fid is None: # No previous new fid create, do it now. new_fid = len(self.ffatypes) # Copy the ffatype label self.ffatypes.append(self.ffatypes[fid]) # Keep track of the new fid fixed_fids[(sid, fid)] = new_fid if log.do_warning: log.warn('Atoms with type ID %i in scope %s were changed to type ID %i.' % (fid, self.scopes[sid], new_fid)) # Apply the new fid self.ffatype_ids[i] = new_fid self.ffatype_id_to_scope_id[new_fid] = sid # Turn the ffatypes in the scopes into array if self.ffatypes is not None: self.ffatypes = np.array(self.ffatypes, copy=False) if self.scopes is not None: self.scopes = np.array(self.scopes, copy=False) # check the range of the ids if self.ffatype_ids.min() != 0 or self.ffatype_ids.max() != len(self.ffatypes)-1: raise ValueError('The ffatype_ids have incorrect bounds.') if log.do_medium: log('The following atom types are present in the system:') log.hline() if self.scopes is None: log(' Atom type ID Number of atoms') log.hline() for ffatype_id, ffatype in enumerate(self.ffatypes): log('%22s %3i %3i' % (ffatype, ffatype_id, (self.ffatype_ids==ffatype_id).sum())) else: log(' Scope Atom type ID Number of atoms') log.hline() for ffatype_id, ffatype in enumerate(self.ffatypes): scope = self.scopes[self.ffatype_id_to_scope_id[ffatype_id]] log('%22s %22s %3i %3i' % (scope, ffatype, ffatype_id, (self.ffatype_ids==ffatype_id).sum())) log.hline() log.blank() def _get_natom(self): """The number of atoms""" return len(self.pos) natom = property(_get_natom) def _get_nffatype(self): """The number of atom types""" return len(self.ffatypes) nffatype = property(_get_nffatype) def _get_nbond(self): '''The number of bonds''' if self.bonds is None: return 0 else: return len(self.bonds) nbond = property(_get_nbond) @classmethod def from_file(cls, *fns, **user_kwargs): """Construct a new System instance from one or more files **Arguments:** fn1, fn2, ... A list of filenames that are read in order. Information in later files overrides information in earlier files. **Optional arguments:** Any argument from the default constructor ``__init__``. These must be given with keywords. **Supported file formats** .xyz Standard Cartesian coordinates file (in angstroms). Atomic positions and atomic numbers are read from this file. If the title consists of 3, 6 or 9 numbers, each group of three numbers is interpreted as a cell vector (in angstroms). A guess of the bonds will be made based on inter-atomic distances. .psf Atom types and bonds are read from this file .chk Internal text-based checkpoint format. It just contains a dictionary with the constructor arguments. """ with log.section('SYS'): kwargs = {} for fn in fns: if fn.endswith('.xyz'): from molmod import Molecule mol = Molecule.from_file(fn) kwargs['numbers'] = mol.numbers.copy() kwargs['pos'] = mol.coordinates.copy() elif fn.endswith('.psf'): from molmod.io import PSFFile psf = PSFFile(fn) kwargs['ffatypes'] = psf.atom_types kwargs['bonds'] = np.array(psf.bonds, copy=False) kwargs['charges'] = np.array(psf.charges, copy=False) elif fn.endswith('.chk'): from molmod.io import load_chk allowed_keys = [ 'numbers', 'pos', 'scopes', 'scope_ids', 'ffatypes', 'ffatype_ids', 'bonds', 'rvecs', 'charges', 'radii', 'dipoles','radii2','masses', ] for key, value in load_chk(fn).iteritems(): if key in allowed_keys: kwargs.update({key: value}) elif fn.endswith('.h5'): with h5.File(fn, 'r') as f: return cls.from_hdf5(f) else: raise IOError('Can not read from file \'%s\'.' % fn) if log.do_high: log('Read system parameters from %s.' % fn) kwargs.update(user_kwargs) return cls(**kwargs) @classmethod def from_hdf5(cls, f): '''Create a system from an HDF5 file/group containing a system group **Arguments:** f An open h5.File object with a system group. The system group must at least contain a numbers and pos dataset. ''' sgrp = f['system'] kwargs = { 'numbers': sgrp['numbers'][:], 'pos': sgrp['pos'][:], } for key in 'scopes', 'scope_ids', 'ffatypes', 'ffatype_ids', 'bonds', 'rvecs', 'charges', 'masses': if key in sgrp: kwargs[key] = sgrp[key][:] if log.do_high: log('Read system parameters from %s.' % f.filename) return cls(**kwargs) def to_file(self, fn): """Write the system to a file **Arguments:** fn The file to write to. Supported formats are: chk Internal human-readable checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. h5 Internal binary checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. xyz A simple file with atomic positions and elements. Coordinates are written in Angstroms. """ if fn.endswith('.chk'): from molmod.io import dump_chk dump_chk(fn, { 'numbers': self.numbers, 'pos': self.pos, 'ffatypes': self.ffatypes, 'ffatype_ids': self.ffatype_ids, 'scopes': self.scopes, 'scope_ids': self.scope_ids, 'bonds': self.bonds, 'rvecs': self.cell.rvecs, 'charges': self.charges, 'masses': self.masses, }) elif fn.endswith('.h5'): with h5.File(fn, 'w') as f: self.to_hdf5(f) elif fn.endswith('.xyz'): from molmod.io import XYZWriter from molmod.periodic import periodic xyz_writer = XYZWriter(fn, [periodic[n].symbol for n in self.numbers]) xyz_writer.dump(str(self), self.pos) else: raise NotImplementedError('The extension of %s does not correspond to any known format.' % fn) if log.do_high: with log.section('SYS'): log('Wrote system to %s.' % fn) def to_hdf5(self, f): """Write the system to a HDF5 file. **Arguments:** f A Writable h5.File object. """ if 'system' in f: raise ValueError('The HDF5 file already contains a system description.') sgrp = f.create_group('system') sgrp.create_dataset('numbers', data=self.numbers) sgrp.create_dataset('pos', data=self.pos) if self.scopes is not None: sgrp.create_dataset('scopes', data=self.scopes, dtype='a22') sgrp.create_dataset('scope_ids', data=self.scope_ids) if self.ffatypes is not None: sgrp.create_dataset('ffatypes', data=self.ffatypes, dtype='a22') sgrp.create_dataset('ffatype_ids', data=self.ffatype_ids) if self.bonds is not None: sgrp.create_dataset('bonds', data=self.bonds) if self.cell.nvec > 0: sgrp.create_dataset('rvecs', data=self.cell.rvecs) if self.charges is not None: sgrp.create_dataset('charges', data=self.charges) if self.masses is not None: sgrp.create_dataset('masses', data=self.masses) def get_scope(self, index): """Return the of the scope (string) of atom with given index""" return self.scopes[self.scope_ids[index]] def get_ffatype(self, index): """Return the of the ffatype (string) of atom with given index""" return self.ffatypes[self.ffatype_ids[index]] def get_indexes(self, rule): """Return the atom indexes that match the filter ``rule`` ``rule`` can be a function that accepts two arguments: system and an atom index and that returns True of the atom with index i is of a given type. On the other hand ``rule`` can be an ATSELECT string that defines the atoms of interest. A list of atom indexes is returned. """ if isinstance(rule, basestring): rule = atsel_compile(rule) return np.array([i for i in xrange(self.natom) if rule(self, i)]) def iter_bonds(self): """Iterate over all bonds.""" if self.bonds is not None: for i1, i2 in self.bonds: yield i1, i2 def iter_angles(self): """Iterative over all possible valence angles. This routine is based on the attribute ``bonds``. """ if self.bonds is not None: for i1 in xrange(self.natom): for i0 in self.neighs1[i1]: for i2 in self.neighs1[i1]: if i0 > i2: yield i0, i1, i2 def iter_dihedrals(self): """Iterative over all possible dihedral angles. This routine is based on the attribute ``bonds``. """ if self.bonds is not None: for i1, i2 in self.bonds: for i0 in self.neighs1[i1]: if i0==i2: continue for i3 in self.neighs1[i2]: if i1==i3: continue if i0==i3: continue yield i0, i1, i2, i3 def detect_bonds(self, exceptions=None): """Initialize the ``bonds`` attribute based on inter-atomic distances **Optional argument:** exceptions: Specify custom threshold for certain pairs of elements. This must be a dictionary with ((num0, num1), threshold) as items. For each pair of elements, a distance threshold is used to detect bonded atoms. The distance threshold is based on a database of known bond lengths. If the database does not contain a record for the given element pair, the threshold is based on the sum of covalent radii. """ with log.section('SYS'): from molmod.bonds import bonds if self.bonds is not None: if log.do_warning: log.warn('Overwriting existing bonds.') work = np.zeros((self.natom*(self.natom-1))/2, float) self.cell.compute_distances(work, self.pos) ishort = (work < bonds.max_length*1.01).nonzero()[0] new_bonds = [] for i in ishort: i0, i1 = _unravel_triangular(i) n0 = self.numbers[i0] n1 = self.numbers[i1] if exceptions is not None: threshold = exceptions.get((n0, n1)) if threshold is None and n0!=n1: threshold = exceptions.get((n1, n0)) if threshold is not None: if work[i] < threshold: new_bonds.append([i0, i1]) continue if bonds.bonded(n0, n1, work[i]): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) self._init_derived_bonds() def detect_ffatypes(self, rules): """Initialize the ``ffatypes`` attribute based on ATSELECT rules. **Argument:** rules A list of (ffatype, rule) pairs that will be used to initialize the attributes ``self.ffatypes`` and ``self.ffatype_ids``. If the system already has FF atom types, they will be overwritten. """ with log.section('SYS'): # Give warning if needed if self.ffatypes is not None: if log.do_warning: log.warn('Overwriting existing FF atom types.') # Compile all the rules my_rules = [] for ffatype, rule in rules: check_name(ffatype) if isinstance(rule, basestring): rule = atsel_compile(rule) my_rules.append((ffatype, rule)) # Use the rules to detect the atom types lookup = {} self.ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in xrange(self.natom): my_ffatype = None for ffatype, rule in my_rules: if rule(self, i): my_ffatype = ffatype break if my_ffatype is None: raise ValueError('Could not detect FF atom type of atom %i.' % i) ffatype_id = lookup.get(my_ffatype) if ffatype_id is None: ffatype_id = len(lookup) self.ffatypes.append(my_ffatype) lookup[my_ffatype] = ffatype_id self.ffatype_ids[i] = ffatype_id # Make sure all is done well ... self._init_derived_ffatypes() def set_standard_masses(self): """Initialize the ``masses`` attribute based on the atomic numbers.""" with log.section('SYS'): from molmod.periodic import periodic if self.masses is not None: if log.do_warning: log.warn('Overwriting existing masses with default masses.') self.masses = np.array([periodic[n].mass for n in self.numbers]) def align_cell(self, lcs=None, swap=True): """Align the unit cell with respect to the Cartesian Axes frame **Optional Arguments:** lcs The linear combinations of the unit cell that must get aligned. This is a 2x3 array, where each row represents a linear combination of cell vectors. The first row is for alignment with the x-axis, second for the z-axis. The default value is:: np.array([ [1, 0, 0], [0, 0, 1], ]) swap By default, the first alignment is done with the z-axis, then with the x-axis. The order is reversed when swap is set to False. The alignment of the first linear combination is always perfect. The alignment of the second linear combination is restricted to a plane. The cell is always made right-handed. The coordinates are also rotated with respect to the origin, but never inverted. The attributes of the system are modified in-place. Note that this method only works on 3D periodic systems. """ from molmod import Rotation, deg # define the target target = np.array([ [1, 0, 0], [0, 0, 1], ]) # default value for linear combination if lcs is None: lcs = target.copy() # The starting values pos = self.pos rvecs = self.cell.rvecs.copy() if rvecs.shape != (3,3): raise TypeError('The align_cell method only supports 3D periodic systems.') # Optionally swap a cell vector if the cell is not right-handed. if np.linalg.det(rvecs) < 0: # Find a reasonable vector to swap... index = rvecs.sum(axis=1).argmin() rvecs[index] *= -1 # Define the source source = np.dot(lcs, rvecs) # Do the swapping if swap: target = target[::-1] source = source[::-1] # auxiliary function def get_angle_axis(t, s): cos = np.dot(s, t)/np.linalg.norm(s)/np.linalg.norm(t) angle = np.arccos(np.clip(cos, -1, 1)) axis = np.cross(s, t) return angle, axis # first alignment angle, axis = get_angle_axis(target[0], source[0]) if np.linalg.norm(axis) > 0: r1 = Rotation.from_properties(angle, axis, False) pos = r1*pos rvecs = r1*rvecs source = r1*source # second alignment # Make sure the source is orthogonal to target[0] s1p = source[1] - target[0]*np.dot(target[0], source[1]) angle, axis = get_angle_axis(target[1], s1p) r2 = Rotation.from_properties(angle, axis, False) pos = r2*pos rvecs = r2*rvecs # assign self.pos = pos self.cell = Cell(rvecs) def supercell(self, *reps): """Return a supercell of the system. **Arguments:** reps An array with repetitions, which must have the same number of elements as the number of cell vectors. If this method is called with a non-periodic system, a TypeError is raised. """ if self.cell.nvec == 0: raise TypeError('Can not create a supercell of a non-periodic system.') if self.cell.nvec != len(reps): raise TypeError('The number of repetitions must match the number of cell vectors.') if not isinstance(reps, tuple): raise TypeError('The reps argument must be a tuple') # A dictionary with new arguments for the construction of the supercell new_args = {} # A) No repetitions if self.ffatypes is not None: new_args['ffatypes'] = self.ffatypes.copy() if self.scopes is not None: new_args['scopes'] = self.scopes.copy() # B) Simple repetitions rep_all = np.product(reps) for attrname in 'numbers', 'ffatype_ids', 'scope_ids', 'charges', 'radii', 'radii2', 'masses': value = getattr(self, attrname) if value is not None: new_args[attrname] = np.tile(value, rep_all) attrname = 'dipoles' value = getattr(self, attrname) if value is not None: new_args[attrname] = np.tile(value, (rep_all, 1)) # C) Cell vectors new_args['rvecs'] = self.cell.rvecs*np.array(reps)[:,None] # D) Atom positions new_pos = np.zeros((self.natom*rep_all, 3), float) start = 0 for iimage in np.ndindex(reps): stop = start+self.natom new_pos[start:stop] = self.pos + np.dot(iimage, self.cell.rvecs) start = stop new_args['pos'] = new_pos if self.bonds is not None: # E) Bonds # E.1) A function that translates a set of image indexes and an old atom # index into a new atom index offsets = {} start = 0 for iimage in np.ndindex(reps): offsets[iimage] = start start += self.natom def to_new_atom_index(iimage, i): return offsets[iimage] + i # E.2) Construct extended bond information: for each bond, also keep # track of periodic image it connects to. Note that this information # is implicit in yaff, and derived using the minimum image convention. rel_iimage = {} for ibond in xrange(len(self.bonds)): i0, i1 = self.bonds[ibond] delta = self.pos[i0] - self.pos[i1] frac = np.dot(self.cell.gvecs, delta) rel_iimage[ibond] = np.ceil(frac-0.5) # E.3) Create the new bonds new_bonds = np.zeros((len(self.bonds)*rep_all,2), int) counter = 0 for iimage0 in np.ndindex(reps): for ibond in xrange(len(self.bonds)): i0, i1 = self.bonds[ibond] # Translate i0 to the new index. j0 = to_new_atom_index(iimage0, i0) # Also translate i1 to the new index. This is a bit more tricky. # The difficult case occurs when the bond between i0 and i1 # connects different periodic images. In that case, the change # in periodic image must be taken into account. iimage1 = tuple((iimage0[c] + rel_iimage[ibond][c]) % reps[c] for c in xrange(len(reps))) j1 = to_new_atom_index(iimage1, i1) new_bonds[counter,0] = j0 new_bonds[counter,1] = j1 counter += 1 new_args['bonds'] = new_bonds # Done return System(**new_args) def remove_duplicate(self, threshold=0.1): '''Return a system object in which the duplicate atoms and bonds are removed. **Optional argument:** threshold The minimum distance between two atoms that are supposed to be different. When it makes sense, properties of overlapping atoms are averaged out. In other cases, the atom with the lowest index in a cluster of overlapping atoms defines the new value of a property. ''' # compute distances ndist = (self.natom*(self.natom-1))/2 if ndist == 0: # single atom systems, go home ... return dists = np.zeros(ndist) self.cell.compute_distances(dists, self.pos) # find clusters of overlapping atoms from molmod import ClusterFactory cf = ClusterFactory() counter = 0 for i0 in xrange(self.natom): for i1 in xrange(i0): if dists[counter] < threshold: cf.add_related(i0, i1) counter += 1 clusters = [c.items for c in cf.get_clusters()] # make a mapping from new to old atoms newold = {} oldnew = {} counter = 0 for cluster in clusters: # all merged atoms come first newold[counter] = sorted(cluster) for item in cluster: oldnew[item] = counter counter += 1 if len(clusters) > 0: old_reduced = set.union(*clusters) else: old_reduced = [] for item in xrange(self.natom): # all remaining atoms follow if item not in old_reduced: newold[counter] = [item] oldnew[item] = counter counter += 1 natom = len(newold) def reduce_int_array(old): if old is None: return None else: new = np.zeros(natom, old.dtype) for inew, iolds in newold.iteritems(): new[inew] = old[iolds[0]] return new def reduce_float_array(old): if old is None: return None else: new = np.zeros(natom, old.dtype) for inew, iolds in newold.iteritems(): new[inew] = old[iolds].mean() return new def reduce_float_matrix(old): '''Reduce array with dim=2''' if old is None: return None else: new = np.zeros((natom,np.shape(old)[1]), old.dtype) for inew, iolds in newold.iteritems(): new[inew] = old[iolds].mean(axis=0) return new # trivial cases numbers = reduce_int_array(self.numbers) scope_ids = reduce_int_array(self.scope_ids) ffatype_ids = reduce_int_array(self.ffatype_ids) charges = reduce_float_array(self.charges) radii = reduce_float_array(self.radii) dipoles = reduce_float_matrix(self.dipoles) radii2 = reduce_float_array(self.radii2) masses = reduce_float_array(self.masses) # create averaged positions pos = np.zeros((natom, 3), float) for inew, iolds in newold.iteritems(): # move to the same image oldposs = self.pos[iolds].copy() assert oldposs.ndim == 2 ref = oldposs[0] for oldpos in oldposs[1:]: delta = oldpos-ref self.cell.mic(delta) oldpos[:] = delta+ref # compute mean position pos[inew] = oldposs.mean(axis=0) # create reduced list of bonds if self.bonds is None: bonds = None else: bonds = set((oldnew[ia], oldnew[ib]) for ia, ib in self.bonds) bonds = np.array([bond for bond in bonds]) return self.__class__(numbers, pos, self.scopes, scope_ids, self.ffatypes, ffatype_ids, bonds, self.cell.rvecs, charges, radii, dipoles, radii2, masses) def subsystem(self, indexes): '''Return a System instance in which only the given atom are retained.''' def reduce_array(old): if old is None: return None else: new = np.zeros((len(indexes),) + old.shape[1:], old.dtype) for inew, iold in enumerate(indexes): new[inew] = old[iold] return new def reduce_scopes(): if self.scopes is None: return None else: return [self.get_scope(i) for i in indexes] def reduce_ffatypes(): if self.ffatypes is None: return None else: return [self.get_ffatype(i) for i in indexes] def reduce_bonds(old): translation = dict((iold, inew) for inew, iold in enumerate(indexes)) new = [] for old0, old1 in old: new0 = translation.get(old0) new1 = translation.get(old1) if not (new0 is None or new1 is None): new.append([new0, new1]) return new return System( numbers=reduce_array(self.numbers), pos=reduce_array(self.pos), scopes=reduce_scopes(), ffatypes=reduce_ffatypes(), bonds=reduce_bonds(self.bonds), rvecs=self.cell.rvecs, charges=reduce_array(self.charges), radii=reduce_array(self.radii), dipoles=reduce_array(self.dipoles), radii2=reduce_array(self.radii2), masses=reduce_array(self.masses), ) def cut_bonds(self, indexes): '''Remove all bonds of a fragment with the remainder of the system; **Arguments:** indexes The atom indexes in the fragment ''' new_bonds = [] indexes = set(indexes) for i0, i1 in self.bonds: if not ((i0 in indexes) ^ (i1 in indexes)): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) def to_file(self, fn): """Write the system to a file **Arguments:** fn The file to write to. Supported formats are: chk Internal human-readable checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. h5 Internal binary checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. xyz A simple file with atomic positions and elements. Coordinates are written in Angstroms. """ if fn.endswith('.chk'): from molmod.io import dump_chk dump_chk(fn, { 'numbers': self.numbers, 'pos': self.pos, 'ffatypes': self.ffatypes, 'ffatype_ids': self.ffatype_ids, 'scopes': self.scopes, 'scope_ids': self.scope_ids, 'bonds': self.bonds, 'rvecs': self.cell.rvecs, 'charges': self.charges, 'radii': self.radii, 'dipoles': self.dipoles, 'radii2': self.radii2, 'masses': self.masses, }) elif fn.endswith('.h5'): with h5.File(fn, 'w') as f: self.to_hdf5(f) elif fn.endswith('.xyz'): from molmod.io import XYZWriter from molmod.periodic import periodic xyz_writer = XYZWriter(fn, [periodic[n].symbol for n in self.numbers]) xyz_writer.dump(str(self), self.pos) else: raise NotImplementedError('The extension of %s does not correspond to any known format.' % fn) if log.do_high: with log.section('SYS'): log('Wrote system to %s.' % fn) def to_hdf5(self, f): """Write the system to a HDF5 file. **Arguments:** f A Writable h5.File object. """ if 'system' in f: raise ValueError('The HDF5 file already contains a system description.') sgrp = f.create_group('system') sgrp.create_dataset('numbers', data=self.numbers) sgrp.create_dataset('pos', data=self.pos) if self.scopes is not None: sgrp.create_dataset('scopes', data=self.scopes, dtype='a22') sgrp.create_dataset('scope_ids', data=self.scope_ids) if self.ffatypes is not None: sgrp.create_dataset('ffatypes', data=self.ffatypes, dtype='a22') sgrp.create_dataset('ffatype_ids', data=self.ffatype_ids) if self.bonds is not None: sgrp.create_dataset('bonds', data=self.bonds) if self.cell.nvec > 0: sgrp.create_dataset('rvecs', data=self.cell.rvecs) if self.charges is not None: sgrp.create_dataset('charges', data=self.charges) if self.radii is not None: sgrp.create_dataset('radii', data=self.radii) if self.dipoles is not None: sgrp.create_dataset('dipoles', data=self.dipoles) if self.radii2 is not None: sgrp.create_dataset('radii2', data=self.radii2) if self.masses is not None: sgrp.create_dataset('masses', data=self.masses)
def __init__(self, system, pppm_accuracy=1e-5, fn_log='lammps.log', scalings=np.zeros(6), fn_system='lammps.data', fn_table='lammps.table', triclinic=True, comm=None): ''' **Arguments:** system An instance of the ``System`` class. **Optional Arguments:** scalings Numpy array [6x1] containing the scaling factors for 1-2, 1-3, 1-4 Lennard-Jones and 1-2, 1-3, 1-4 electrostatic interactions. Default: [0.0,0.0,0.0,0.0,0.0,0.0] pppm_accuracy Desired relative error in electrostatic forces Default: 1e-5 fn_log Filename where LAMMPS output is stored. Default: 'lammps.log' fn_system Filename of file containing system information, can be written using the ```write_lammps_data``` method. Default: lammps.data fn_table Filename of file containing tabulated non-bonded potential without charges, can be written using the ```write_lammps_table``` method. Default: lammps.table triclinic Boolean, specify whether a triclinic cell will be used during the simulation. If the cell is orthogonal, set it to False as LAMMPS should run slightly faster. Default: True comm MPI communicator, required if LAMMPS should run in parallel ''' if system.cell.nvec != 3: raise ValueError( 'The system must be 3d periodic for Lammps calculations.') if not os.path.isfile(fn_system): raise ValueError('Could not read file %s' % fn_system) if not os.path.isfile(fn_table): raise ValueError('Could not read file %s' % fn_table) ForcePart.__init__(self, 'lammps', system) self.system = system self.comm = comm self.triclinic = triclinic self.setup_lammps(fn_system, fn_table, pppm_accuracy, fn_log, scalings) # LAMMPS needs cell vectors (ax,0,0), (bx,by,0) and (cx,cy,cz) # This means we need to perform a rotation to switch between Yaff and # LAMMPS coordinates. All information about this rotation is stored # in the variables defined below self.rvecs = np.eye(3) self.cell = Cell(self.rvecs) self.rot = np.zeros((3, 3))
class ForcePartLammps(ForcePart): '''Energies obtained from Lammps.''' def __init__(self, system, pppm_accuracy=1e-5, fn_log='lammps.log', scalings=np.zeros(6), fn_system='lammps.data', fn_table='lammps.table', triclinic=True, comm=None): ''' **Arguments:** system An instance of the ``System`` class. **Optional Arguments:** scalings Numpy array [6x1] containing the scaling factors for 1-2, 1-3, 1-4 Lennard-Jones and 1-2, 1-3, 1-4 electrostatic interactions. Default: [0.0,0.0,0.0,0.0,0.0,0.0] pppm_accuracy Desired relative error in electrostatic forces Default: 1e-5 fn_log Filename where LAMMPS output is stored. Default: 'lammps.log' fn_system Filename of file containing system information, can be written using the ```write_lammps_data``` method. Default: lammps.data fn_table Filename of file containing tabulated non-bonded potential without charges, can be written using the ```write_lammps_table``` method. Default: lammps.table triclinic Boolean, specify whether a triclinic cell will be used during the simulation. If the cell is orthogonal, set it to False as LAMMPS should run slightly faster. Default: True comm MPI communicator, required if LAMMPS should run in parallel ''' if system.cell.nvec != 3: raise ValueError( 'The system must be 3d periodic for Lammps calculations.') if not os.path.isfile(fn_system): raise ValueError('Could not read file %s' % fn_system) if not os.path.isfile(fn_table): raise ValueError('Could not read file %s' % fn_table) ForcePart.__init__(self, 'lammps', system) self.system = system self.comm = comm self.triclinic = triclinic self.setup_lammps(fn_system, fn_table, pppm_accuracy, fn_log, scalings) # LAMMPS needs cell vectors (ax,0,0), (bx,by,0) and (cx,cy,cz) # This means we need to perform a rotation to switch between Yaff and # LAMMPS coordinates. All information about this rotation is stored # in the variables defined below self.rvecs = np.eye(3) self.cell = Cell(self.rvecs) self.rot = np.zeros((3, 3)) def setup_lammps(self, fn_system, fn_table, pppm_accuracy, fn_log, scalings): ''' Pass all commands that would normally appear in the LAMMPS input file to our instance of LAMMPS. ''' self.lammps = lammps(name='mpi', comm=self.comm, cmdargs=["-screen", fn_log, "-log", "none"]) nffa = self.system.ffatypes.shape[0] self.lammps.command("units electron") self.lammps.command("atom_style full") self.lammps.command("atom_modify map array") self.lammps.command("read_data %s" % fn_system) self.lammps.command("mass * 1.0") self.lammps.command("bond_style none") if self.system.charges is not None: self.lammps.command( "pair_style hybrid/overlay coul/long 28.4 table spline 5000") self.lammps.command("pair_coeff * * coul/long") self.lammps.command("kspace_style pppm %f" % pppm_accuracy) else: self.lammps.command("pair_style table spline 2000") # # Electrostatics only # self.lammps.command("pair_style coul/long 24.0") # self.lammps.command("pair_coeff * *") # self.lammps.command("kspace_style pppm %f" % pppm_accuracy) for i in xrange(nffa): ffai = self.system.ffatypes[i] for j in xrange(i, nffa): ffaj = self.system.ffatypes[j] if ffai > ffaj: name = '%s-%s' % (ffai, ffaj) else: name = '%s-%s' % (ffaj, ffai) self.lammps.command("pair_coeff %d %d table %s %s" % (i + 1, j + 1, fn_table, name)) # self.lammps.command("pair_coeff %d %d %s %s" % (i+1,j+1,fn_table,name)) # self.lammps.command("pair_coeff %d %d %s %03d-%03d" % (i+1,j+1,fn_table,i,j)) if self.system.charges is not None: self.lammps.command("special_bonds lj %f %f %f coul %f %f %f" % (scalings[0], scalings[1], scalings[2], scalings[3], scalings[4], scalings[5])) else: self.lammps.command("special_bonds lj %f %f %f" % (scalings[0], scalings[1], scalings[2])) self.lammps.command("neighbor 0.0 bin") self.lammps.command("neigh_modify delay 0 every 1 check no") self.lammps.command("variable eng equal pe") self.lammps.command("compute virial all pressure NULL virial") self.lammps.command("fix 1 all nve") def update_rot(self): # Compute the transformation to go from Yaff to LAMMPS coordinates, # based on current Yaff and LAMMPS cell vectors self.rot[:] = 0.0 A = self.system.cell.rvecs[0, :] B = self.system.cell.rvecs[1, :] C = self.system.cell.rvecs[2, :] self.rot[0, :] = np.cross(B, C) self.rot[1, :] = np.cross(C, A) self.rot[2, :] = np.cross(A, B) self.rot = np.dot(self.rvecs.transpose(), self.rot) / self.system.cell.volume def update_pos(self, pos): ''' Update the LAMMPS positions based on the coordinates from Yaff ''' # Perform the rotation pos[:] = np.einsum('ij,kj', pos, self.rot) # TODO: check if mic is necessary or not # for i in xrange(self.system.natom): # self.cell.mic(pos[i]) # x = self.lammps.gather_atoms("x",1,3) # for i in xrange(3*self.system.natom): # x[i] = pos[i/3,i%3] #self.lammps.scatter_atoms("x",1,3,x) self.lammps.scatter_atoms("x", 1, 3, ctypes.c_void_p(pos.ctypes.data)) def update_rvecs(self, rvecs): # Find cell vectors in LAMMPS format give_lower(rvecs, self.rvecs) self.cell.update_rvecs(self.rvecs) # Update the corresponding rotation matrix self.update_rot() if self.triclinic: self.lammps.command( "change_box all x final %f %30.20f y final %f %30.20f z final %f %30.20f xy final %30.20f xz final %30.20f yz final %30.20f\n" % (0.0, self.rvecs[0, 0], 0.0, self.rvecs[1, 1], 0.0, self.rvecs[2, 2], self.rvecs[1, 0], self.rvecs[2, 0], self.rvecs[2, 1])) else: self.lammps.command( "change_box all x final %f %30.20f y final %f %30.20f z final %f %30.20f\n" % (0.0, self.rvecs[0, 0], 0.0, self.rvecs[1, 1], 0.0, self.rvecs[2, 2])) def _internal_compute(self, gpos, vtens): with timer.section("LAMMPS overhead"): self.update_rvecs(self.system.cell.rvecs) self.update_pos(self.system.pos.copy()) with timer.section("LAMMPS"): self.lammps.command("run 0 post no") with timer.section("LAMMPS overhead"): energy = self.lammps.extract_variable("eng", None, 0) if gpos is not None: f = self.lammps.gather_atoms("f", 1, 3) buffer = np.core.multiarray.int_asbuffer( ctypes.addressof(f), 8 * 3 * self.system.natom) gpos[:] = np.frombuffer(buffer, float).reshape((-1, 3)) # for iatom in xrange(self.system.natom): # for j in xrange(3): # gpos[iatom,j] = f[3*iatom+j] gpos[:] = -np.einsum('ij,kj', gpos, self.rot.transpose()) if vtens is not None: w = self.lammps.extract_compute("virial", 0, 1) buffer = np.core.multiarray.int_asbuffer( ctypes.addressof(w.contents), 8 * 6) vtens_lammps = np.frombuffer(buffer, float) # vtens_lammps = np.zeros(6) # for i in xrange(6): # vtens_lammps[i] = w[i] # Lammps gives the virial per volume in pascal, so we have to # multiply with some prefactors vtens_lammps[:] *= -pascal * self.system.cell.volume # The [6x1] vector has to be cast to a symmetric [3x3] tensor # Lammps orders the values as [xx,yy,zz,xy,xz,yz] vtens[np.triu_indices(3)] = vtens_lammps[[0, 3, 4, 1, 5, 2]] vtens[np.tril_indices(3)] = vtens_lammps[[0, 3, 1, 4, 5, 2]] # Finally we have to compute the effect of the rotation on the # the virial tensor to get the values in Yaff coordinates vtens[:] = np.dot(self.rot.transpose(), np.dot(vtens[:], self.rot)) return energy
def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, valence_charges=None, dipoles=None, radii2=None, masses=None): r'''Initialize a System object. **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii, :math:`R_{A,c}`, that determine shape of the atomic charge distribution: .. math:: \rho_{A,c}(\mathbf{r}) = \frac{q_A}{\pi^{3/2}R_{A,c}^3} \exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,c}^2} \right) valence_charges In case a point-core + distribute valence charge is used, this vector contains the valence charges. The core charges can be computed by subtracting the valence charges from the net charges. dipoles An array of atomic dipoles radii2 An array of atomic radii, :math:`R_{A,d}`, that determine shape of the atomic dipole distribution: .. math:: \rho_{A,d}(\mathbf{r}) = -2\frac{\mathbf{d}_A \cdot (\mathbf{r} - \mathbf{R}_A)}{ \sqrt{\pi} R_{A,d}^5 }\exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,d}^2} \right) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError('Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError('The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).') self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.valence_charges = valence_charges self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived()
class RDF(AnalysisHook): def __init__(self, rcut, rspacing, f=None, start=0, end=-1, max_sample=None, step=None, select0=None, select1=None, pairs_sr=None, nimage=0, pospath='trajectory/pos', poskey='pos', cellpath=None, cellkey=None, outpath=None): """Computes a radial distribution function (RDF) **Argument:** rcut The cutoff for the RDF analysis. This should be lower than the spacing between the primitive cell planes, multiplied by (1+2*nimage). rspacing The width of the bins to build up the RDF. **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start, end, max_sample, step arguments to setup the selection of time slices. See ``get_slice`` for more information. select0 A list of atom indexes that are considered for the computation of the rdf. If not given, all atoms are used. select1 A list of atom indexes that are needed to compute an RDF between two disjoint sets of atoms. (If there is some overlap between select0 and select1, an error will be raised.) If this is None, an 'internal' RDF will be computed for the atoms specified in select0. pairs_sr An array with short-range pairs of atoms (shape K x 2). When given, an additional RDFs is generated for the short-range pairs (rdf_sr). nimage The number of cell images to consider in the computation of the pair distances. By default, this is zero, meaning that only the minimum image convention is used. pospath The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. This is only needed for an off-line analysis poskey In case of an on-line analysis, this is the key of the state item that contains the data from which the RDF is derived. cellpath The path the time-dependent cell vector data. This is only needed when the cell parameters are variable and the analysis is off-line. cellkey The key of the stateitem that contains the cell vectors. This is only needed when the cell parameters are variable and the analysis is done on-line. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_rdf' % path. If this path already exists, it will be removed first. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the RDF is built up as the itertive algorithm progresses. The end option is ignored and max_sample is not applicable to an on-line analysis. """ if select0 is not None: if len(select0) != len(set(select0)): raise ValueError('No duplicates are allowed in select0') if len(select0) == 0: raise ValueError('select0 can not be an empty list') if select1 is not None: if len(select1) != len(set(select1)): raise ValueError('No duplicates are allowed in select1') if len(select1) == 0: raise ValueError('select1 can not be an empty list') if select0 is not None and select1 is not None and len(select0) + len(select1) != len(set(select0) | set(select1)): raise ValueError('No overlap is allowed between select0 and select1. If you want to compute and RDF within a set of atoms, omit the select1 argument.') if select0 is None and select1 is not None: raise ValueError('select1 can not be given without select0.') self.rcut = rcut self.rspacing = rspacing self.select0 = select0 self.select1 = select1 self.pairs_sr = self._process_pairs_sr(pairs_sr) self.nimage = nimage self.nbin = int(self.rcut/self.rspacing) self.bins = np.arange(self.nbin+1)*self.rspacing self.d = self.bins[:-1] + 0.5*self.rspacing self.rdf_sum = np.zeros(self.nbin, float) if self.pairs_sr is not None: self.rdf_sum_sr = np.zeros(self.nbin, float) self.nsample = 0 if outpath is None: outpath = pospath + '_rdf' analysis_inputs = {'pos': AnalysisInput(pospath, poskey), 'cell': AnalysisInput(cellpath, cellkey, False)} AnalysisHook.__init__(self, f, start, end, max_sample, step, analysis_inputs, outpath, False) def _process_pairs_sr(self, pairs_sr): '''Process the short-range pairs The following modifications are made to the list of short-range pairs: - The pairs that do not fit in select0 (and select1) are left out. - The list is properly sorted. Note that the argument pairs_sr provided to the constructor is not modified in-place. It is therefore safe to reuse it for another RDF analysis. ''' if pairs_sr is None: return None elif self.select1 is None: index0 = dict((atom0, i0) for i0, atom0 in enumerate(self.select0)) index1 = index0 else: index0 = dict((atom0, i0) for i0, atom0 in enumerate(self.select0)) index1 = dict((atom1, i1) for i1, atom1 in enumerate(self.select1)) my_pairs_sr = [] for atom0, atom1 in pairs_sr: i0 = index0.get(atom0) i1 = index1.get(atom1) if i0 is None or i1 is None: i0 = index0.get(atom1) i1 = index1.get(atom0) if i0 is None or i1 is None: continue if self.select1 is None and i0 < i1: i0, i1 = i1, i0 my_pairs_sr.append((i0, i1)) if len(my_pairs_sr) > 0: my_pairs_sr.sort() return np.array(my_pairs_sr) def _update_rvecs(self, rvecs): self.cell = Cell(rvecs) if self.cell.nvec != 3: raise ValueError('RDF can only be computed for 3D periodic systems.') if (2*self.rcut > self.cell.rspacings*(1+2*self.nimage)).any(): raise ValueError('The 2*rcut argument should not exceed any of the cell spacings.') def configure_online(self, iterative, st_pos, st_cell=None): self.natom = iterative.ff.system.natom self._update_rvecs(iterative.ff.system.cell.rvecs) def configure_offline(self, ds_pos, ds_cell=None): if ds_cell is None: # In this case, we have a unit cell that does not change shape. # It must be configured just once. if 'rvecs' in self.f['system']: self._update_rvecs(self.f['system/rvecs'][:]) else: self._update_rvecs(None) # get the total number of atoms self.natom = self.f['system/numbers'].shape[0] def init_first(self): '''Setup some work arrays''' # determine the number of atoms if self.select0 is None: self.natom0 = self.natom else: self.natom0 = len(self.select0) self.pos0 = np.zeros((self.natom0, 3), float) # the number of pairs if self.select1 is None: self.npair = (self.natom0*(self.natom0-1))/2 self.pos1 = None else: self.natom1 = len(self.select1) self.pos1 = np.zeros((self.natom1, 3), float) self.npair = self.natom0*self.natom1 # multiply the number of pairs by all images self.npair *= (1 + 2*self.nimage)**3 # Prepare the output self.work = np.zeros(self.npair, float) AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('rdf', (self.nbin,), float) self.outg['d'] = self.d if self.pairs_sr is not None: self.outg.create_dataset('rdf_sr', (self.nbin,), float) def read_online(self, st_pos, st_cell=None): if st_cell is not None: self._update_rvecs(st_cell.value) if self.select0 is None: self.pos0[:] = st_pos.value else: self.pos0[:] = st_pos.value[self.select0] if self.select1 is not None: self.pos1[:] = st_pos.value[self.select1] def read_offline(self, i, ds_pos, ds_cell=None): if ds_cell is not None: self._update_rvecs(np.array(ds_cell[i])) if self.select0 is None: ds_pos.read_direct(self.pos0, (i,)) else: ds_pos.read_direct(self.pos0, (i,self.select0)) if self.select1 is not None: ds_pos.read_direct(self.pos1, (i,self.select1)) def compute_iteration(self): self.cell.compute_distances(self.work, self.pos0, self.pos1, nimage=self.nimage) counts = np.histogram(self.work, bins=self.bins)[0] normalization = (self.npair/(self.cell.volume*(1+2*self.nimage)**3)*(4*np.pi*self.rspacing))*self.d**2 self.rdf_sum += counts/normalization if self.pairs_sr is not None: self.cell.compute_distances(self.work[:len(self.pairs_sr)], self.pos0, self.pos1, pairs=self.pairs_sr, do_include=True) counts_sr = np.histogram(self.work[:len(self.pairs_sr)], bins=self.bins)[0] self.rdf_sum_sr += counts_sr/normalization self.nsample += 1 def compute_derived(self): # derive the RDF self.rdf = self.rdf_sum/self.nsample if self.pairs_sr is not None: self.rdf_sr = self.rdf_sum_sr/self.nsample # store everything in the h5py file if self.outg is not None: self.outg['rdf'][:] = self.rdf if self.pairs_sr is not None: self.outg['rdf_sr'][:] = self.rdf_sr def plot(self, fn_png='rdf.png'): import matplotlib.pyplot as pt pt.clf() xunit = log.length.conversion pt.plot(self.d/xunit, self.rdf, 'k-', drawstyle='steps-mid') if self.pairs_sr is not None: pt.plot(self.d/xunit, self.rdf_sr, 'r-', drawstyle='steps-mid') pt.xlabel('Distance [%s]' % log.length.notation) pt.ylabel('RDF') pt.xlim(self.bins[0]/xunit, self.bins[-1]/xunit) pt.savefig(fn_png)
def _update_rvecs(self, rvecs): self.cell = Cell(rvecs) if self.cell.nvec != 3: raise ValueError('RDF can only be computed for 3D periodic systems.') if (2*self.rcut > self.cell.rspacings*(1+2*self.nimage)).any(): raise ValueError('The 2*rcut argument should not exceed any of the cell spacings.')
def align_cell(self, lcs=None, swap=True): """Align the unit cell with respect to the Cartesian Axes frame **Optional Arguments:** lcs The linear combinations of the unit cell that must get aligned. This is a 2x3 array, where each row represents a linear combination of cell vectors. The first row is for alignment with the x-axis, second for the z-axis. The default value is:: np.array([ [1, 0, 0], [0, 0, 1], ]) swap By default, the first alignment is done with the z-axis, then with the x-axis. The order is reversed when swap is set to False. The alignment of the first linear combination is always perfect. The alignment of the second linear combination is restricted to a plane. The cell is always made right-handed. The coordinates are also rotated with respect to the origin, but never inverted. The attributes of the system are modified in-place. Note that this method only works on 3D periodic systems. """ from molmod import Rotation, deg # define the target target = np.array([ [1, 0, 0], [0, 0, 1], ]) # default value for linear combination if lcs is None: lcs = target.copy() # The starting values pos = self.pos rvecs = self.cell.rvecs.copy() if rvecs.shape != (3, 3): raise TypeError( 'The align_cell method only supports 3D periodic systems.') # Optionally swap a cell vector if the cell is not right-handed. if np.linalg.det(rvecs) < 0: # Find a reasonable vector to swap... index = rvecs.sum(axis=1).argmin() rvecs[index] *= -1 # Define the source source = np.dot(lcs, rvecs) # Do the swapping if swap: target = target[::-1] source = source[::-1] # auxiliary function def get_angle_axis(t, s): cos = np.dot(s, t) / np.linalg.norm(s) / np.linalg.norm(t) angle = np.arccos(np.clip(cos, -1, 1)) axis = np.cross(s, t) return angle, axis # first alignment angle, axis = get_angle_axis(target[0], source[0]) if np.linalg.norm(axis) > 0: r1 = Rotation.from_properties(angle, axis, False) pos = r1 * pos rvecs = r1 * rvecs source = r1 * source # second alignment # Make sure the source is orthogonal to target[0] s1p = source[1] - target[0] * np.dot(target[0], source[1]) angle, axis = get_angle_axis(target[1], s1p) r2 = Rotation.from_properties(angle, axis, False) pos = r2 * pos rvecs = r2 * rvecs # assign self.pos = pos self.cell = Cell(rvecs)
def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, valence_charges=None, dipoles=None, radii2=None, masses=None): r'''Initialize a System object. **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii, :math:`R_{A,c}`, that determine shape of the atomic charge distribution: .. math:: \rho_{A,c}(\mathbf{r}) = \frac{q_A}{\pi^{3/2}R_{A,c}^3} \exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,c}^2} \right) valence_charges In case a point-core + distribute valence charge is used, this vector contains the valence charges. The core charges can be computed by subtracting the valence charges from the net charges. dipoles An array of atomic dipoles radii2 An array of atomic radii, :math:`R_{A,d}`, that determine shape of the atomic dipole distribution: .. math:: \rho_{A,d}(\mathbf{r}) = -2\frac{\mathbf{d}_A \cdot (\mathbf{r} - \mathbf{R}_A)}{ \sqrt{\pi} R_{A,d}^5 }\exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,d}^2} \right) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError( 'Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError( 'The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).' ) self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.valence_charges = valence_charges self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived()
class System(object): def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, valence_charges=None, dipoles=None, radii2=None, masses=None): r'''Initialize a System object. **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii, :math:`R_{A,c}`, that determine shape of the atomic charge distribution: .. math:: \rho_{A,c}(\mathbf{r}) = \frac{q_A}{\pi^{3/2}R_{A,c}^3} \exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,c}^2} \right) valence_charges In case a point-core + distribute valence charge is used, this vector contains the valence charges. The core charges can be computed by subtracting the valence charges from the net charges. dipoles An array of atomic dipoles radii2 An array of atomic radii, :math:`R_{A,d}`, that determine shape of the atomic dipole distribution: .. math:: \rho_{A,d}(\mathbf{r}) = -2\frac{\mathbf{d}_A \cdot (\mathbf{r} - \mathbf{R}_A)}{ \sqrt{\pi} R_{A,d}^5 }\exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,d}^2} \right) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError( 'Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError( 'The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).' ) self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.valence_charges = valence_charges self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived() def _init_log(self): if log.do_medium: log('Unit cell') log.hline() log('Number of periodic dimensions: %i' % self.cell.nvec) lengths, angles = self.cell.parameters names = 'abc' for i in range(len(lengths)): log('Cell parameter %5s: %10s' % (names[i], log.length(lengths[i]))) names = 'alpha', 'beta', 'gamma' for i in range(len(angles)): log('Cell parameter %5s: %10s' % (names[i], log.angle(angles[i]))) log.hline() log.blank() def _init_derived(self): if self.bonds is not None: self._init_derived_bonds() if self.scopes is not None: self._init_derived_scopes() elif self.scope_ids is not None: raise ValueError( 'The scope_ids only make sense when the scopes argument is given.' ) if self.ffatypes is not None: self._init_derived_ffatypes() elif self.ffatype_ids is not None: raise ValueError( 'The ffatype_ids only make sense when the ffatypes argument is given.' ) def _init_derived_bonds(self): # 1-bond neighbors self.neighs1 = dict((i, set([])) for i in range(self.natom)) for i0, i1 in self.bonds: self.neighs1[i0].add(i1) self.neighs1[i1].add(i0) # 2-bond neighbors self.neighs2 = dict((i, set([])) for i in range(self.natom)) for i0, n0 in self.neighs1.items(): for i1 in n0: for i2 in self.neighs1[i1]: # Require that there are no shorter paths than two bonds between # i0 and i2. Also avoid duplicates. if i2 > i0 and i2 not in self.neighs1[i0]: self.neighs2[i0].add(i2) self.neighs2[i2].add(i0) # 3-bond neighbors self.neighs3 = dict((i, set([])) for i in range(self.natom)) for i0, n0 in self.neighs1.items(): for i1 in n0: for i3 in self.neighs2[i1]: # Require that there are no shorter paths than three bonds # between i0 and i3. Also avoid duplicates. if i3 != i0 and i3 not in self.neighs1[ i0] and i3 not in self.neighs2[i0]: self.neighs3[i0].add(i3) self.neighs3[i3].add(i0) # 4-bond neighbors self.neighs4 = dict((i, set([])) for i in range(self.natom)) for i0, n0 in self.neighs1.items(): for i1 in n0: for i4 in self.neighs3[i1]: # Require that there are no shorter paths than three bonds # between i0 and i4. Also avoid duplicates. if i4 != i0 and i4 not in self.neighs1[ i0] and i4 not in self.neighs2[ i0] and i4 not in self.neighs3[i0]: self.neighs4[i0].add(i4) self.neighs4[i4].add(i0) # report some basic stuff on screen if log.do_medium: log('Analysis of the bonds:') bond_types = {} for i0, i1 in self.bonds: key = tuple(sorted([self.numbers[i0], self.numbers[i1]])) bond_types[key] = bond_types.get(key, 0) + 1 log.hline() log(' First Second Count') for (num0, num1), count in sorted(bond_types.items()): log('%6i %6i %5i' % (num0, num1, count)) log.hline() log.blank() log('Analysis of the neighbors:') log.hline() log('Number of first neighbors: %6i' % (sum(len(n) for n in self.neighs1.values()) // 2)) log('Number of second neighbors: %6i' % (sum(len(n) for n in self.neighs2.values()) // 2)) log('Number of third neighbors: %6i' % (sum(len(n) for n in self.neighs3.values()) // 2)) # Collect all types of 'environments' for each element. This is # useful to double check the bonds envs = {} for i0 in range(self.natom): num0 = self.numbers[i0] nnums = tuple( sorted(self.numbers[i1] for i1 in self.neighs1[i0])) key = (num0, nnums) envs[key] = envs.get(key, 0) + 1 # Print the environments on screen log.hline() log('Element Neighboring elements Count') for (num0, nnums), count in sorted(envs.items()): log('%7i %20s %5i' % (num0, ','.join(str(num1) for num1 in nnums), count)) log.hline() log.blank() def _init_derived_scopes(self): if self.scope_ids is None: if len(self.scopes) != self.natom: raise TypeError( 'When the scope_ids are derived automatically, the length of the scopes list must match the number of atoms.' ) lookup = {} scopes = [] self.scope_ids = np.zeros(self.natom, int) for i in range(self.natom): scope = self.scopes[i] scope_id = lookup.get(scope) if scope_id is None: scope_id = len(scopes) scopes.append(scope) lookup[scope] = scope_id self.scope_ids[i] = scope_id self.scopes = scopes for scope in self.scopes: check_name(scope) # check the range of the ids if self.scope_ids.min() != 0 or self.scope_ids.max() != len( self.scopes) - 1: raise ValueError('The ffatype_ids have incorrect bounds.') if log.do_medium: log('The following scopes are present in the system:') log.hline() log(' Scope ID Number of atoms') log.hline() for scope_id, scope in enumerate(self.scopes): log('%22s %3i %3i' % (scope, scope_id, (self.scope_ids == scope_id).sum())) log.hline() log.blank() def _init_derived_ffatypes(self): if self.ffatype_ids is None: if len(self.ffatypes) != self.natom: raise TypeError( 'When the ffatype_ids are derived automatically, the length of the ffatypes list must match the number of atoms.' ) lookup = {} ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in range(self.natom): if self.scope_ids is None: ffatype = self.ffatypes[i] key = ffatype, None else: scope_id = self.scope_ids[i] ffatype = self.ffatypes[i] key = ffatype, scope_id ffatype_id = lookup.get(key) if ffatype_id is None: ffatype_id = len(ffatypes) ffatypes.append(ffatype) lookup[key] = ffatype_id self.ffatype_ids[i] = ffatype_id self.ffatypes = ffatypes for ffatype in self.ffatypes: check_name(ffatype) # check the range of the ids if self.ffatype_ids.min() != 0 or self.ffatype_ids.max() != len( self.ffatypes) - 1: raise ValueError('The ffatype_ids have incorrect bounds.') # differentiate ffatype_ids if the same ffatype_id is used in different # scopes if self.scopes is not None: self.ffatype_id_to_scope_id = {} fixed_fids = {} for i in range(self.natom): fid = self.ffatype_ids[i] sid = self.ffatype_id_to_scope_id.get(fid) if sid is None: self.ffatype_id_to_scope_id[fid] = self.scope_ids[i] elif sid != self.scope_ids[i]: # We found the same ffatype_id in a different scope_id. This # must be fixed. First check if we have already a new # scope_id ready sid = self.scope_ids[i] new_fid = fixed_fids.get((sid, fid)) if new_fid is None: # No previous new fid create, do it now. new_fid = len(self.ffatypes) # Copy the ffatype label self.ffatypes.append(self.ffatypes[fid]) # Keep track of the new fid fixed_fids[(sid, fid)] = new_fid if log.do_warning: log.warn( 'Atoms with type ID %i in scope %s were changed to type ID %i.' % (fid, self.scopes[sid], new_fid)) # Apply the new fid self.ffatype_ids[i] = new_fid self.ffatype_id_to_scope_id[new_fid] = sid # Turn the ffatypes in the scopes into array if self.ffatypes is not None: self.ffatypes = np.array(self.ffatypes, copy=False) if self.scopes is not None: self.scopes = np.array(self.scopes, copy=False) # check the range of the ids if self.ffatype_ids.min() != 0 or self.ffatype_ids.max() != len( self.ffatypes) - 1: raise ValueError('The ffatype_ids have incorrect bounds.') if log.do_medium: log('The following atom types are present in the system:') log.hline() if self.scopes is None: log(' Atom type ID Number of atoms') log.hline() for ffatype_id, ffatype in enumerate(self.ffatypes): log('%22s %3i %3i' % (ffatype, ffatype_id, (self.ffatype_ids == ffatype_id).sum())) else: log(' Scope Atom type ID Number of atoms' ) log.hline() for ffatype_id, ffatype in enumerate(self.ffatypes): scope = self.scopes[ self.ffatype_id_to_scope_id[ffatype_id]] log('%22s %22s %3i %3i' % (scope, ffatype, ffatype_id, (self.ffatype_ids == ffatype_id).sum())) log.hline() log.blank() def _get_natom(self): """The number of atoms""" return len(self.pos) natom = property(_get_natom) def _get_nffatype(self): """The number of atom types""" return len(self.ffatypes) nffatype = property(_get_nffatype) def _get_nbond(self): '''The number of bonds''' if self.bonds is None: return 0 else: return len(self.bonds) nbond = property(_get_nbond) @classmethod def from_file(cls, *fns, **user_kwargs): """Construct a new System instance from one or more files **Arguments:** fn1, fn2, ... A list of filenames that are read in order. Information in later files overrides information in earlier files. **Optional arguments:** Any argument from the default constructor ``__init__``. These must be given with keywords. **Supported file formats** .xyz Standard Cartesian coordinates file (in angstroms). Atomic positions and atomic numbers are read from this file. If the title consists of 3, 6 or 9 numbers, each group of three numbers is interpreted as a cell vector (in angstroms). A guess of the bonds will be made based on inter-atomic distances. .psf Atom types and bonds are read from this file .chk Internal text-based checkpoint format. It just contains a dictionary with the constructor arguments. """ with log.section('SYS'): kwargs = {} for fn in fns: if fn.endswith('.xyz'): from molmod import Molecule mol = Molecule.from_file(fn) kwargs['numbers'] = mol.numbers.copy() kwargs['pos'] = mol.coordinates.copy() elif fn.endswith('.psf'): from molmod.io import PSFFile psf = PSFFile(fn) kwargs['ffatypes'] = psf.atom_types kwargs['bonds'] = np.array(psf.bonds, copy=False) kwargs['charges'] = np.array(psf.charges, copy=False) elif fn.endswith('.chk'): from molmod.io import load_chk allowed_keys = [ 'numbers', 'pos', 'scopes', 'scope_ids', 'ffatypes', 'ffatype_ids', 'bonds', 'rvecs', 'charges', 'radii', 'valence_charges', 'dipoles', 'radii2', 'masses', ] for key, value in load_chk(fn).items(): if key in allowed_keys: kwargs.update({key: value}) elif fn.endswith('.h5'): with h5.File(fn, 'r') as f: return cls.from_hdf5(f) else: raise IOError('Can not read from file \'%s\'.' % fn) if log.do_high: log('Read system parameters from %s.' % fn) kwargs.update(user_kwargs) return cls(**kwargs) @classmethod def from_hdf5(cls, f): '''Create a system from an HDF5 file/group containing a system group **Arguments:** f An open h5.File object with a system group. The system group must at least contain a numbers and pos dataset. ''' sgrp = f['system'] kwargs = { 'numbers': sgrp['numbers'][:], 'pos': sgrp['pos'][:], } for key in 'scope_ids', 'ffatype_ids', 'bonds', 'rvecs', 'charges', 'masses': if key in sgrp: kwargs[key] = sgrp[key][:] # String arrays have to be converted back to unicode... for key in 'scopes', 'ffatypes': if key in sgrp: kwargs[key] = np.asarray(sgrp[key][:], 'U22') if log.do_high: log('Read system parameters from %s.' % f.filename) return cls(**kwargs) def to_file(self, fn): """Write the system to a file **Arguments:** fn The file to write to. Supported formats are: chk Internal human-readable checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. h5 Internal binary checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. xyz A simple file with atomic positions and elements. Coordinates are written in Angstroms. """ if fn.endswith('.chk'): from molmod.io import dump_chk dump_chk( fn, { 'numbers': self.numbers, 'pos': self.pos, 'ffatypes': self.ffatypes, 'ffatype_ids': self.ffatype_ids, 'scopes': self.scopes, 'scope_ids': self.scope_ids, 'bonds': self.bonds, 'rvecs': self.cell.rvecs, 'charges': self.charges, 'radii': self.radii, 'valence_charges': self.valence_charges, 'dipoles': self.dipoles, 'radii2': self.radii2, 'masses': self.masses, }) elif fn.endswith('.h5'): with h5.File(fn, 'w') as f: self.to_hdf5(f) elif fn.endswith('.xyz'): from molmod.io import XYZWriter from molmod.periodic import periodic xyz_writer = XYZWriter(fn, [periodic[n].symbol for n in self.numbers]) xyz_writer.dump(str(self), self.pos) else: raise NotImplementedError( 'The extension of %s does not correspond to any known format.' % fn) if log.do_high: with log.section('SYS'): log('Wrote system to %s.' % fn) def to_hdf5(self, f): """Write the system to a HDF5 file. **Arguments:** f A Writable h5.File object. """ if 'system' in f: raise ValueError( 'The HDF5 file already contains a system description.') sgrp = f.create_group('system') sgrp.create_dataset('numbers', data=self.numbers) sgrp.create_dataset('pos', data=self.pos) if self.scopes is not None: sgrp.create_dataset('scopes', data=np.asarray(self.scopes, 'S22')) sgrp.create_dataset('scope_ids', data=self.scope_ids) if self.ffatypes is not None: sgrp.create_dataset('ffatypes', data=np.asarray(self.ffatypes, 'S22')) sgrp.create_dataset('ffatype_ids', data=self.ffatype_ids) if self.bonds is not None: sgrp.create_dataset('bonds', data=self.bonds) if self.cell.nvec > 0: sgrp.create_dataset('rvecs', data=self.cell.rvecs) if self.charges is not None: sgrp.create_dataset('charges', data=self.charges) if self.radii is not None: sgrp.create_dataset('radii', data=self.radii) if self.valence_charges is not None: sgrp.create_dataset('valence_charges', data=self.charges) if self.dipoles is not None: sgrp.create_dataset('dipoles', data=self.dipoles) if self.radii2 is not None: sgrp.create_dataset('radii2', data=self.radii2) if self.masses is not None: sgrp.create_dataset('masses', data=self.masses) def get_scope(self, index): """Return the of the scope (string) of atom with given index""" return self.scopes[self.scope_ids[index]] def get_ffatype(self, index): """Return the of the ffatype (string) of atom with given index""" return self.ffatypes[self.ffatype_ids[index]] def get_indexes(self, rule): """Return the atom indexes that match the filter ``rule`` ``rule`` can be a function that accepts two arguments: system and an atom index and that returns True of the atom with index i is of a given type. On the other hand ``rule`` can be an ATSELECT string that defines the atoms of interest. A list of atom indexes is returned. """ if isinstance(rule, str): rule = atsel_compile(rule) return np.array([i for i in range(self.natom) if rule(self, i)]) def iter_bonds(self): """Iterate over all bonds.""" if self.bonds is not None: for i1, i2 in self.bonds: yield i1, i2 def iter_angles(self): """Iterative over all possible valence angles. This routine is based on the attribute ``bonds``. """ if self.bonds is not None: for i1 in range(self.natom): for i0 in self.neighs1[i1]: for i2 in self.neighs1[i1]: if i0 > i2: yield i0, i1, i2 def iter_dihedrals(self): """Iterative over all possible dihedral angles. This routine is based on the attribute ``bonds``. """ if self.bonds is not None: for i1, i2 in self.bonds: for i0 in self.neighs1[i1]: if i0 == i2: continue for i3 in self.neighs1[i2]: if i1 == i3: continue if i0 == i3: continue yield i0, i1, i2, i3 def iter_oops(self): """Iterative over all possible oop patterns." This routine is based on the attribute ``bonds``. """ if self.bonds is not None: for i3 in range(self.natom): if len(self.neighs1[i3]) == 3: i0, i1, i2 = self.neighs1[i3] yield i0, i1, i2, i3 def detect_bonds(self, exceptions=None): """Initialize the ``bonds`` attribute based on inter-atomic distances **Optional argument:** exceptions: Specify custom threshold for certain pairs of elements. This must be a dictionary with ((num0, num1), threshold) as items. For each pair of elements, a distance threshold is used to detect bonded atoms. The distance threshold is based on a database of known bond lengths. If the database does not contain a record for the given element pair, the threshold is based on the sum of covalent radii. """ with log.section('SYS'): from molmod.bonds import bonds if self.bonds is not None: if log.do_warning: log.warn('Overwriting existing bonds.') work = np.zeros((self.natom * (self.natom - 1)) // 2, float) self.cell.compute_distances(work, self.pos) ishort = (work < bonds.max_length * 1.01).nonzero()[0] new_bonds = [] for i in ishort: i0, i1 = _unravel_triangular(i) n0 = self.numbers[i0] n1 = self.numbers[i1] if exceptions is not None: threshold = exceptions.get((n0, n1)) if threshold is None and n0 != n1: threshold = exceptions.get((n1, n0)) if threshold is not None: if work[i] < threshold: new_bonds.append([i0, i1]) continue if bonds.bonded(n0, n1, work[i]): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) self._init_derived_bonds() def detect_ffatypes(self, rules): """Initialize the ``ffatypes`` attribute based on ATSELECT rules. **Argument:** rules A list of (ffatype, rule) pairs that will be used to initialize the attributes ``self.ffatypes`` and ``self.ffatype_ids``. If the system already has FF atom types, they will be overwritten. """ with log.section('SYS'): # Give warning if needed if self.ffatypes is not None: if log.do_warning: log.warn('Overwriting existing FF atom types.') # Compile all the rules my_rules = [] for ffatype, rule in rules: check_name(ffatype) if isinstance(rule, str): rule = atsel_compile(rule) my_rules.append((ffatype, rule)) # Use the rules to detect the atom types lookup = {} self.ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in range(self.natom): my_ffatype = None for ffatype, rule in my_rules: if rule(self, i): my_ffatype = ffatype break if my_ffatype is None: raise ValueError( 'Could not detect FF atom type of atom %i.' % i) ffatype_id = lookup.get(my_ffatype) if ffatype_id is None: ffatype_id = len(lookup) self.ffatypes.append(my_ffatype) lookup[my_ffatype] = ffatype_id self.ffatype_ids[i] = ffatype_id # Make sure all is done well ... self._init_derived_ffatypes() def set_standard_masses(self): """Initialize the ``masses`` attribute based on the atomic numbers.""" with log.section('SYS'): from molmod.periodic import periodic if self.masses is not None: if log.do_warning: log.warn( 'Overwriting existing masses with default masses.') self.masses = np.array([periodic[n].mass for n in self.numbers]) def align_cell(self, lcs=None, swap=True): """Align the unit cell with respect to the Cartesian Axes frame **Optional Arguments:** lcs The linear combinations of the unit cell that must get aligned. This is a 2x3 array, where each row represents a linear combination of cell vectors. The first row is for alignment with the x-axis, second for the z-axis. The default value is:: np.array([ [1, 0, 0], [0, 0, 1], ]) swap By default, the first alignment is done with the z-axis, then with the x-axis. The order is reversed when swap is set to False. The alignment of the first linear combination is always perfect. The alignment of the second linear combination is restricted to a plane. The cell is always made right-handed. The coordinates are also rotated with respect to the origin, but never inverted. The attributes of the system are modified in-place. Note that this method only works on 3D periodic systems. """ from molmod import Rotation, deg # define the target target = np.array([ [1, 0, 0], [0, 0, 1], ]) # default value for linear combination if lcs is None: lcs = target.copy() # The starting values pos = self.pos rvecs = self.cell.rvecs.copy() if rvecs.shape != (3, 3): raise TypeError( 'The align_cell method only supports 3D periodic systems.') # Optionally swap a cell vector if the cell is not right-handed. if np.linalg.det(rvecs) < 0: # Find a reasonable vector to swap... index = rvecs.sum(axis=1).argmin() rvecs[index] *= -1 # Define the source source = np.dot(lcs, rvecs) # Do the swapping if swap: target = target[::-1] source = source[::-1] # auxiliary function def get_angle_axis(t, s): cos = np.dot(s, t) / np.linalg.norm(s) / np.linalg.norm(t) angle = np.arccos(np.clip(cos, -1, 1)) axis = np.cross(s, t) return angle, axis # first alignment angle, axis = get_angle_axis(target[0], source[0]) if np.linalg.norm(axis) > 0: r1 = Rotation.from_properties(angle, axis, False) pos = r1 * pos rvecs = r1 * rvecs source = r1 * source # second alignment # Make sure the source is orthogonal to target[0] s1p = source[1] - target[0] * np.dot(target[0], source[1]) angle, axis = get_angle_axis(target[1], s1p) r2 = Rotation.from_properties(angle, axis, False) pos = r2 * pos rvecs = r2 * rvecs # assign self.pos = pos self.cell = Cell(rvecs) def supercell(self, *reps): """Return a supercell of the system. **Arguments:** reps An array with repetitions, which must have the same number of elements as the number of cell vectors. If this method is called with a non-periodic system, a TypeError is raised. """ if self.cell.nvec == 0: raise TypeError( 'Can not create a supercell of a non-periodic system.') if self.cell.nvec != len(reps): raise TypeError( 'The number of repetitions must match the number of cell vectors.' ) if not isinstance(reps, tuple): raise TypeError('The reps argument must be a tuple') # A dictionary with new arguments for the construction of the supercell new_args = {} # A) No repetitions if self.ffatypes is not None: new_args['ffatypes'] = self.ffatypes.copy() if self.scopes is not None: new_args['scopes'] = self.scopes.copy() # B) Simple repetitions rep_all = np.product(reps) for attrname in 'numbers', 'ffatype_ids', 'scope_ids', 'charges', \ 'radii', 'valence_charges', 'radii2', 'masses': value = getattr(self, attrname) if value is not None: new_args[attrname] = np.tile(value, rep_all) attrname = 'dipoles' value = getattr(self, attrname) if value is not None: new_args[attrname] = np.tile(value, (rep_all, 1)) # C) Cell vectors new_args['rvecs'] = self.cell.rvecs * np.array(reps)[:, None] # D) Atom positions new_pos = np.zeros((self.natom * rep_all, 3), float) start = 0 for iimage in np.ndindex(reps): stop = start + self.natom new_pos[start:stop] = self.pos + np.dot(iimage, self.cell.rvecs) start = stop new_args['pos'] = new_pos if self.bonds is not None: # E) Bonds # E.1) A function that translates a set of image indexes and an old atom # index into a new atom index offsets = {} start = 0 for iimage in np.ndindex(reps): offsets[iimage] = start start += self.natom def to_new_atom_index(iimage, i): return offsets[iimage] + i # E.2) Construct extended bond information: for each bond, also keep # track of periodic image it connects to. Note that this information # is implicit in yaff, and derived using the minimum image convention. rel_iimage = {} for ibond in range(len(self.bonds)): i0, i1 = self.bonds[ibond] delta = self.pos[i0] - self.pos[i1] frac = np.dot(self.cell.gvecs, delta) rel_iimage[ibond] = np.ceil(frac - 0.5) # E.3) Create the new bonds new_bonds = np.zeros((len(self.bonds) * rep_all, 2), int) counter = 0 for iimage0 in np.ndindex(reps): for ibond in range(len(self.bonds)): i0, i1 = self.bonds[ibond] # Translate i0 to the new index. j0 = to_new_atom_index(iimage0, i0) # Also translate i1 to the new index. This is a bit more tricky. # The difficult case occurs when the bond between i0 and i1 # connects different periodic images. In that case, the change # in periodic image must be taken into account. iimage1 = tuple( (iimage0[c] + rel_iimage[ibond][c]) % reps[c] for c in range(len(reps))) j1 = to_new_atom_index(iimage1, i1) new_bonds[counter, 0] = j0 new_bonds[counter, 1] = j1 counter += 1 new_args['bonds'] = new_bonds # Done return System(**new_args) def remove_duplicate(self, threshold=0.1): '''Return a system object in which the duplicate atoms and bonds are removed. **Optional argument:** threshold The minimum distance between two atoms that are supposed to be different. When it makes sense, properties of overlapping atoms are averaged out. In other cases, the atom with the lowest index in a cluster of overlapping atoms defines the new value of a property. ''' # compute distances ndist = (self.natom * (self.natom - 1)) // 2 if ndist == 0: # single atom systems, go home ... return dists = np.zeros(ndist) self.cell.compute_distances(dists, self.pos) # find clusters of overlapping atoms from molmod import ClusterFactory cf = ClusterFactory() counter = 0 for i0 in range(self.natom): for i1 in range(i0): if dists[counter] < threshold: cf.add_related(i0, i1) counter += 1 clusters = [c.items for c in cf.get_clusters()] # make a mapping from new to old atoms newold = {} oldnew = {} counter = 0 for cluster in clusters: # all merged atoms come first newold[counter] = sorted(cluster) for item in cluster: oldnew[item] = counter counter += 1 if len(clusters) > 0: old_reduced = set.union(*clusters) else: old_reduced = [] for item in range(self.natom): # all remaining atoms follow if item not in old_reduced: newold[counter] = [item] oldnew[item] = counter counter += 1 natom = len(newold) def reduce_int_array(old): if old is None: return None else: new = np.zeros(natom, old.dtype) for inew, iolds in newold.items(): new[inew] = old[iolds[0]] return new def reduce_float_array(old): if old is None: return None else: new = np.zeros(natom, old.dtype) for inew, iolds in newold.items(): new[inew] = old[iolds].mean() return new def reduce_float_matrix(old): '''Reduce array with dim=2''' if old is None: return None else: new = np.zeros((natom, np.shape(old)[1]), old.dtype) for inew, iolds in newold.items(): new[inew] = old[iolds].mean(axis=0) return new # trivial cases numbers = reduce_int_array(self.numbers) scope_ids = reduce_int_array(self.scope_ids) ffatype_ids = reduce_int_array(self.ffatype_ids) charges = reduce_float_array(self.charges) radii = reduce_float_array(self.radii) valence_charges = reduce_float_array(self.valence_charges) dipoles = reduce_float_matrix(self.dipoles) radii2 = reduce_float_array(self.radii2) masses = reduce_float_array(self.masses) # create averaged positions pos = np.zeros((natom, 3), float) for inew, iolds in newold.items(): # move to the same image oldposs = self.pos[iolds].copy() assert oldposs.ndim == 2 ref = oldposs[0] for oldpos in oldposs[1:]: delta = oldpos - ref self.cell.mic(delta) oldpos[:] = delta + ref # compute mean position pos[inew] = oldposs.mean(axis=0) # create reduced list of bonds if self.bonds is None: bonds = None else: bonds = set((oldnew[ia], oldnew[ib]) for ia, ib in self.bonds) bonds = np.array([bond for bond in bonds]) return self.__class__(numbers, pos, self.scopes, scope_ids, self.ffatypes, ffatype_ids, bonds, self.cell.rvecs, charges, radii, valence_charges, dipoles, radii2, masses) def subsystem(self, indexes): '''Return a System instance in which only the given atom are retained.''' def reduce_array(old): if old is None: return None else: new = np.zeros((len(indexes), ) + old.shape[1:], old.dtype) for inew, iold in enumerate(indexes): new[inew] = old[iold] return new def reduce_scopes(): if self.scopes is None: return None else: return [self.get_scope(i) for i in indexes] def reduce_ffatypes(): if self.ffatypes is None: return None else: return [self.get_ffatype(i) for i in indexes] def reduce_bonds(old): translation = dict( (iold, inew) for inew, iold in enumerate(indexes)) new = [] for old0, old1 in old: new0 = translation.get(old0) new1 = translation.get(old1) if not (new0 is None or new1 is None): new.append([new0, new1]) return new return System( numbers=reduce_array(self.numbers), pos=reduce_array(self.pos), scopes=reduce_scopes(), ffatypes=reduce_ffatypes(), bonds=reduce_bonds(self.bonds), rvecs=self.cell.rvecs, charges=reduce_array(self.charges), radii=reduce_array(self.radii), valence_charges=reduce_array(self.valence_charges), dipoles=reduce_array(self.dipoles), radii2=reduce_array(self.radii2), masses=reduce_array(self.masses), ) def cut_bonds(self, indexes): '''Remove all bonds of a fragment with the remainder of the system; **Arguments:** indexes The atom indexes in the fragment ''' new_bonds = [] indexes = set(indexes) for i0, i1 in self.bonds: if not ((i0 in indexes) ^ (i1 in indexes)): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) def iter_matches(self, other, overlapping=True): """Yield all renumberings of atoms that map the given system on the current. Parameters ---------- other : yaff.System Another system with the same number of atoms (and chemical formula), or less atoms. overlapping : bool When set to False, the returned matches are guaranteed to be mutually exclusive. The result may not be unique when partially overlapping matches would exist. Use with care. The graph distance is used to perform the mapping, so bonds must be defined in the current and the given system. """ def make_graph_distance_matrix(system): """Return a bond graph distance matrix. Parameters ---------- system : System Molecule (with bonds) for which the graph distances must be computed. The graph distance is used for comparison because it allows the pattern matching to make optimal choices of which pairs of atoms to compare next, i.e. both bonded or nearby the last matched pair. """ from molmod.graphs import Graph return Graph(system.bonds, system.natom).distances def error_sq_fn(x, y): """Compare bonded versus not bonded, rather than the full graph distance. Parameters ---------- x, y: int Graph distances from self and other, respectively. Graph distances are not completely transferable between self and other, i.e. a shorter path may exist between two atoms in the big system (self) that is not present in a fragment (other). Hence, only the absence or presence of a direct bond must be compared. """ return (min(x - 1, 1) - min(y - 1, 1))**2 with log.section('SYS'): log('Generating allowed indexes for renumbering.') # The allowed permutations is just based on the chemical elements, not the atom # types, which could also be useful. allowed = [] if self.ffatypes is None or other.ffatypes is None: for number1 in other.numbers: allowed.append((self.numbers == number1).nonzero()[0]) else: # Only continue if other.ffatypes is a subset of self.ffatypes if not (set(self.ffatypes) >= set(other.ffatypes)): return ffatype_ids0 = self.ffatype_ids ffatypes0 = list(self.ffatypes) order = np.array( [ffatypes0.index(ffatype) for ffatype in other.ffatypes]) ffatype_ids1 = order[other.ffatype_ids] for ffatype_id1 in ffatype_ids1: allowed.append((ffatype_ids0 == ffatype_id1).nonzero()[0]) log('Building distance matrix for self.') dm0 = make_graph_distance_matrix(self) log('Building distance matrix for other.') dm1 = make_graph_distance_matrix(other) # Yield the solutions log('Generating renumberings.') for match in iter_matches(dm0, dm1, allowed, 1e-3, error_sq_fn, overlapping): yield match
def from_files(cls, guest, parameters, **kwargs): """Automated setup of GCMC simulation **Arguments:** guest Two types are accepted: (i) the filename of a system file describing one guest molecule, (ii) a System instance of one guest molecule parameters Force-field parameters describing guest-guest and optionally host-guest interaction. Three types are accepted: (i) the filename of the parameter file, which is a text file that adheres to YAFF parameter format, (ii) a list of such filenames, or (iii) an instance of the Parameters class. **Optional arguments:** hooks A list of MCHooks host Two types are accepted: (i) the filename of a system file describing the host system, (ii) a System instance of the host All other keyword arguments are passed to the ForceField constructor See the constructor of the :class:`yaff.pes.generator.FFArgs` class for the available optional arguments. """ # Load the guest System if isinstance(guest, str): guest = System.from_file(guest) assert isinstance(guest, System) # We want to control nlow and nhigh here ourselves, so remove it from the # optional arguments if the user provided it. kwargs.pop('nlow', None) kwargs.pop('nhigh', None) # Rough guess for number of adsorbed guests nguests = kwargs.pop('nguests', 10) # Load the host if it is present as a keyword host = kwargs.pop('host', None) # Extract the hooks hooks = kwargs.pop('hooks', []) # Efficient treatment of reciprocal ewald contribution if not 'reci_ei' in kwargs.keys(): kwargs['reci_ei'] = 'ewald_interaction' if host is not None: if isinstance(host, str): host = System.from_file(host) assert isinstance(host, System) # If the guest molecule is currently an isolated molecule, than put # it in the same periodic box as the host if guest.cell is None or guest.cell.nvec==0: guest.cell = Cell(host.cell.rvecs) # Construct a complex of host and one guest and the corresponding # force field excluding host-host interactions hostguest = host.merge(guest) external_potential = ForceField.generate(hostguest, parameters, nlow=host.natom, nhigh=host.natom, **kwargs) else: external_potential = None # # Compare the energy of the guest, once isolated, once in a periodic box # guest_isolated = guest.subsystem(np.arange(guest.natom)) # guest_isolated.cell = Cell(np.zeros((0,3))) # optional_arguments = {} # for key in kwargs.keys(): # if key=='reci_ei': continue # optional_arguments[key] = kwargs[key] # ff_guest_isolated = ForceField.generate(guest_isolated, parameters, **optional_arguments) # e_isolated = ff_guest_isolated.compute() # guest_periodic = guest.subsystem(np.arange(guest.natom)) # ff_guest_periodic = ForceField.generate(guest_periodic, parameters, **optional_arguments) # e_periodic = ff_guest_periodic.compute() # if np.abs(e_isolated-e_periodic)>1e-4: # if log.do_warning: # log.warn("An interaction energy of %s of the guest with its periodic " # "images was detected. The interaction of a guest with its periodic " # "images will however NOT be taken into account in this simulation. " # "If the energy difference is large compared to k_bT, you should " # "consider using a supercell." % (log.energy(e_isolated-e_periodic))) # By making use of nlow=nhigh, we automatically discard intramolecular energies eguest = 0.0 # Generator of guest-guest force fields, excluding interactions # between the first N-1 guests def ff_generator(system, guest): return ForceField.generate(system, parameters, nlow=max(0,system.natom-guest.natom), nhigh=max(0,system.natom-guest.natom), **kwargs) return cls(guest, ff_generator, external_potential=external_potential, eguest=eguest, hooks=hooks, nguests=nguests)