def load_pdb(filename): """Loads a single molecule from a pdb file. This function does support only a small fragment from the pdb specification. It assumes that there is only one molecular geometry in the pdb file. """ f = file(filename) numbers = [] coordinates = [] occupancies = [] betas = [] for line in f: if line.startswith("ATOM"): symbol = line[76:78].strip() numbers.append(periodic[symbol].number) coordinates.append([float(line[30:38])*angstrom, float(line[38:46])*angstrom, float(line[46:54])*angstrom]) occupancies.append(float(line[54:60])) betas.append(float(line[60:66])) f.close() if len(numbers) > 0: molecule = Molecule(numbers, coordinates) molecule.occupancies = numpy.array(occupancies) molecule.betas = numpy.array(betas) return molecule else: raise FileFormatError("No molecule found in pdb file %s" % filename)
def get_molecules(self): tpa = Molecule.from_file("input/tpa.xyz") tea = Molecule.from_file("input/tea.xyz") water = Molecule.from_file("input/water.xyz") cyclopentane = Molecule.from_file("input/cyclopentane.xyz") return [tpa, tea, water, cyclopentane]
def convert_molecule_to_molmod(molecule): molModMolecule = Molecule(molecule.numbers, molecule.coordinates, molecule.label, symbols=molecule.symbols) molModMolecule.set_default_graph() return molModMolecule
def load_pdb(filename): """ Loads a single molecule from a pdb file. This function does support only a small fragment from the pdb specification. It assumes that there is only one molecular geometry in the pdb file. """ f = file(filename) numbers = [] coordinates = [] occupancies = [] betas = [] for line in f: if line.startswith("ATOM"): symbol = line[76:78].strip() numbers.append(periodic[symbol].number) coordinates.append( [float(line[30:38]), float(line[38:46]), float(line[46:54])]) occupancies.append(float(line[54:60])) betas.append(float(line[60:66])) f.close() if len(numbers) > 0: molecule = Molecule(numbers, coordinates) molecule.occupancies = numpy.array(occupancies) molecule.betas = numpy.array(betas) return molecule else: raise IOError("No molecule found in pdb file %s" % filename)
def test_one(xyz_fn, checks, reorder=False): mol = XYZFile(xyz_fn).get_molecule() graph = MolecularGraph.from_geometry(mol) zmat_gen = ZMatrixGenerator(graph) if reorder is False: self.assertArraysEqual(zmat_gen.new_index, numpy.arange(mol.size)) self.assertArraysEqual(zmat_gen.old_index, numpy.arange(mol.size)) zmat0 = zmat_gen.cart_to_zmat(mol.coordinates) for field, index, value in checks: self.assertAlmostEqual(zmat0[field][index], value, 2, "%s:%i %f!=%f" % (field,index,zmat0[field][index],value)) numbers0, coordinates0 = zmat_to_cart(zmat0) mol0 = Molecule(numbers0, coordinates0) mol0.write_to_file("output/zmat_%s" % os.path.basename(xyz_fn)) graph0 = MolecularGraph.from_geometry(mol0) zmat_gen0 = ZMatrixGenerator(graph0) self.assertArraysEqual(zmat_gen0.new_index, numpy.arange(mol.size)) self.assertArraysEqual(zmat_gen0.old_index, numpy.arange(mol.size)) zmat1 = zmat_gen0.cart_to_zmat(mol0.coordinates) for field, index, value in checks: self.assertAlmostEqual(zmat1[field][index], value, 2, "%s:%i %f!=%f" % (field,index,zmat1[field][index],value)) numbers1, coordinates1 = zmat_to_cart(zmat1) self.assertArraysEqual(numbers0, numbers1) self.assertArraysAlmostEqual(coordinates0, coordinates1, 1e-5)
def test_many_separate(self): psf = PSFFile() molecule = Molecule.from_file("input/ethene.xyz") psf.add_molecule(molecule) psf.add_molecule(molecule) molecule = Molecule.from_file("input/tea.xyz") psf.add_molecule(molecule) psf.write_to_file("output/many_separate.psf")
def create_molecule(selected_nodes, parent=None): numbers = [] coordinates = [] atoms = list(yield_atoms(selected_nodes)) for atom in atoms: numbers.append(atom.number) if parent is None: coordinates.append(atom.get_absolute_frame().t) else: coordinates.append(atom.get_frame_relative_to(parent).t) result = Molecule(numbers, coordinates) result.atoms = atoms return result
def test_zirconium(self): mol = Molecule.from_file('input/oh3siozroh3.xyz') mol.set_default_graph() assert len(mol.graph.edges)==14 mol = Molecule.from_file('input/osih2osihosih2osih2ozrh2oh.xyz') mol.set_default_graph() assert len(mol.graph.edges)==21 mol = Molecule.from_file('input/h3zrosihohosioh3.xyz') mol.set_default_graph() assert len(mol.graph.edges)==16 mol = Molecule.from_file('input/zr4o8-3.xyz') mol.set_default_graph() assert len(mol.graph.edges)==12
def test_consistency(self): molecules = [ Molecule.from_file("input/cyclopentane.xyz"), Molecule.from_file("input/funny.xyz"), ] for m in molecules: m.set_default_graph() dump_cml("output/tmp.cml", molecules) check = load_cml("output/tmp.cml") for m1, m2 in zip(molecules, check): self.assertEqual(m1.title, m2.title) self.assert_((m1.numbers==m2.numbers).all()) self.assert_((m1.coordinates==m2.coordinates).all()) self.assertEqual(m1.graph.num_vertices, m2.graph.num_vertices) self.assertEqual(set(m1.graph.edges), set(m2.graph.edges))
def test_fingerprint_collisions(self): raise SkipTest # These are collisions with older versions, found by scanning the # pubchem database. cases = [ ('SID_55127927.sdf', 'SID_56274343.sdf'), ('SID_55488598.sdf', 'SID_54258192.sdf'), ('SID_41893280.sdf', 'SID_41893278.sdf'), ('SID_40363570.sdf', 'SID_40363571.sdf'), ('SID_31646548.sdf', 'SID_31646545.sdf') ] for fn0, fn1 in cases: g0 = Molecule.from_file(os.path.join("input", fn0)).graph g1 = Molecule.from_file(os.path.join("input", fn1)).graph self.assertNotEqual(str(g0.fingerprint.data), str(g1.fingerprint.data))
def tune_geometry(graph, mol): """Fine tune a molecular geometry, starting from a (very) poor guess of the initial geometry. Do not expect all details to be in perfect condition. A subsequent optimization with a more accurate level of theory is at least advisable. Arguments: graph -- The molecular graph of the system mol -- The initial guess of the coordinates """ N = len(graph.numbers) from molmod.minimizer import Minimizer, NewtonGLineSearch ff = ToyFF(graph) x_init = mol.coordinates.ravel() # level 3 geometry optimization: bond lengths + pauli ff.dm_reci = 0.2 ff.bond_quad = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-3, 1e-3, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x # level 4 geometry optimization: bond lengths + bending angles + pauli ff.bond_quad = 0.0 ff.bond_hyper = 1.0 ff.span_quad = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-6, 1e-6, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x x_opt = x_init mol = Molecule(graph.numbers, x_opt.reshape((N,3))) return mol
def get_molecule(self, index=0): """Get a molecule from the trajectory Optional argument: | ``index`` -- The frame index [default=0] """ return Molecule(self.numbers, self.geometries[index], self.titles[index], symbols=self.symbols)
def test_tetra(self): molecule = Molecule.from_file("input/tetra.xyz") psf = PSFFile() psf.add_molecule(molecule) self.assert_(psf.bonds.shape[0] == 4) self.assert_(psf.bends.shape[0] == 6) psf.write_to_file("output/tetra.psf")
def setup(filename="ethane.pdb"): mol = Molecule.from_file(filename) # setup stuff mol.set_default_graph() mol.set_default_symbols() return mol
def endElement(self, name): #print "END", name if name == 'molecule': if len(self.current_numbers) > 0: self.current_coordinates = np.array(self.current_coordinates)*angstrom molecule = Molecule(self.current_numbers, self.current_coordinates, self.current_title) molecule.extra = self.current_extra molecule.atoms_extra = self.current_atoms_extra name_to_index = {} for counter, name in enumerate(self.current_atom_names): name_to_index[name] = counter edges = set() current_bonds_extra = {} for name1, name2, extra in self.current_bonds: i1 = name_to_index.get(name1) i2 = name_to_index.get(name2) if i1 is not None and i2 is not None: edge = frozenset([i1, i2]) if len(extra) > 0: current_bonds_extra[edge] = extra edges.add(edge) molecule.bonds_extra = current_bonds_extra if len(edges) == 0: molecule.graph = None else: molecule.graph = MolecularGraph(edges, self.current_numbers) del self.current_atom_names del self.current_bonds self.molecules.append(molecule) self.current_title = None
def get_optimized_molecule(self): opt_coor = self.get_optimization_coordinates() if len(opt_coor) == 0: return None else: return Molecule( self.molecule.numbers, opt_coor[-1], )
def get_first_molecule(self): if self._first is None: raise Error( "get_first_molecule must be called before the first iteration." ) else: title, coordinates = self._first molecule = Molecule(self.numbers, coordinates, title) return molecule
def get_first_molecule(self): """Get the first molecule from the trajectory This can be useful to configure your program before handeling the actual trajectory. """ title, coordinates = self._first molecule = Molecule(self.numbers, coordinates, title, symbols=self.symbols) return molecule
def get_optimized_molecule(self): """Return a molecule object of the optimal geometry""" opt_coor = self.get_optimization_coordinates() if len(opt_coor) == 0: return None else: return Molecule( self.molecule.numbers, opt_coor[-1], )
def _analyze(self): if ("Atomic numbers" in self.fields) and ("Current cartesian coordinates" in self.fields): self.molecule = Molecule( self.fields["Atomic numbers"], numpy.reshape(self.fields["Current cartesian coordinates"], (-1, 3)), self.title, )
def _analyze(self): """Convert a few elementary fields into a molecule object""" if ("Atomic numbers" in self.fields) and ("Current cartesian coordinates" in self.fields): self.molecule = Molecule( self.fields["Atomic numbers"], np.reshape(self.fields["Current cartesian coordinates"], (-1, 3)), self.title, )
def test_improper(self): molecule = Molecule.from_file("input/formol.xyz") psf = PSFFile() psf.add_molecule(molecule) self.assertEqual(psf.impropers.shape, (3,4)) test_block = set([(row[0], row[1]) for row in psf.impropers]) self.assert_((0,1) in test_block) self.assert_((0,2) in test_block) self.assert_((0,3) in test_block) psf.write_to_file("output/tmp_impropers.psf") psf2 = PSFFile("output/tmp_impropers.psf") self.assertArraysEqual(psf.impropers, psf2.impropers)
def endElement(self, name): #print "END", name if name == 'molecule': if len(self.current_numbers) > 0: self.current_coordinates = numpy.array(self.current_coordinates)*angstrom molecule = Molecule(self.current_numbers, self.current_coordinates, self.current_title) molecule.extra = self.current_extra molecule.atoms_extra = self.current_atoms_extra name_to_index = {} for counter, name in enumerate(self.current_atom_names): name_to_index[name] = counter pairs = set() current_bonds_extra = {} for name1, name2, extra in self.current_bonds: i1 = name_to_index.get(name1) i2 = name_to_index.get(name2) if i1 is not None and i2 is not None: pair = frozenset([i1,i2]) if len(extra) > 0: current_bonds_extra[pair] = extra pairs.add(pair) molecule.bonds_extra = current_bonds_extra if len(pairs) == 0: molecule.graph = None else: molecule.graph = MolecularGraph(pairs, self.current_numbers) del self.current_atom_names del self.current_bonds self.molecules.append(molecule) self.current_title = None
def test_example_periodic(self): mol = Molecule.from_file("input/caplayer.cml") unit_cell = UnitCell( numpy.array([ [14.218, 7.109, 0.0], [ 0.0 , 12.313, 0.0], [ 0.0 , 0.0 , 10.0], ])*angstrom, numpy.array([True, True, False]), ) dm = mol.distance_matrix dm = dm + dm.max()*numpy.identity(len(dm)) mol = tune_geometry(mol.graph, mol, unit_cell) mol.write_to_file("output/caplayer.xyz")
def guess_geometry(graph, unitcell_active, unitcell, unitcell_reciproke): """Construct a molecular geometry based on a molecular graph. This routine does not require initial coordinates and will give a very rough picture of the initial geometry. Do not expect all details to be in perfect condition. A subsequent optimization with a more accurate level of theory is at least advisable. Arguments: graph -- The molecular graph of the system """ N = len(graph.numbers) from molmod.minimizer import Minimizer, NewtonGLineSearch ff = ToyFF(graph, unitcell_active, unitcell, unitcell_reciproke) x_init = numpy.random.normal(0,1,N*3) # level 1 geometry optimization: graph based ff.dm_quad = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-10, 1e-8, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x # level 2 geometry optimization: graph based + pauli repulsion ff.dm_quad = 1.0 ff.dm_reci = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-10, 1e-8, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x # Add a little noise to avoid saddle points x_init += numpy.random.uniform(-0.01, 0.01, len(x_init)) # level 3 geometry optimization: bond lengths + pauli ff.dm_quad = 0.0 ff.dm_reci = 0.2 ff.bond_quad = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-3, 1e-3, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x # level 4 geometry optimization: bond lengths + bending angles + pauli ff.bond_quad = 0.0 ff.bond_hyper = 1.0 ff.span_quad = 1.0 minimizer = Minimizer(x_init, ff, NewtonGLineSearch, 1e-6, 1e-6, 2*N, 500, 50, do_gradient=True, verbose=False) x_init = minimizer.x x_opt = x_init mol = Molecule(graph.numbers, x_opt.reshape((N,3))) return mol
def collect_molecules(self, parent, universe=None): Atom = context.application.plugins.get_node("Atom") Bond = context.application.plugins.get_node("Bond") Frame = context.application.plugins.get_node("Frame") if universe == None: universe = parent atom_to_index = {} atoms_extra = {} counter = 0 numbers = [] coordinates = [] for child in parent.children: if isinstance(child, Atom): atom_to_index[child] = counter if len(child.extra) > 0: atoms_extra[counter] = child.extra counter += 1 numbers.append(child.number) coordinates.append(child.get_frame_relative_to(universe).t) if len(numbers) > 0: molecule = Molecule(numbers, coordinates, parent.name) molecule.extra = parent.extra molecule.atoms_extra = atoms_extra molecule.bonds_extra = {} pairs = set([]) for child in parent.children: if isinstance(child, Bond): atoms = child.get_targets() pair = frozenset([atom_to_index[atoms[0]], atom_to_index[atoms[1]]]) if len(child.extra) > 0: molecule.bonds_extra[pair] = child.extra pairs.add(pair) if len(pairs) > 0: molecule.graph = MolecularGraph(pairs, molecule.numbers) else: molecule.graph = None result = [molecule] else: result = [] for child in parent.children: if isinstance(child, Frame): result.extend(self.collect_molecules(child, universe)) return result
def read_cube_header(f): # skip the first two lines title = f.readline().strip() subtitle = f.readline().strip() def read_grid_line(line): """Read a grid line from the cube file""" words = line.split() return (int(words[0]), np.array([float(words[1]), float(words[2]), float(words[3])], float) # all coordinates in a cube file are in atomic units ) # number of atoms and origin of the grid natom, origin = read_grid_line(f.readline()) # numer of grid points in A direction and step vector A, and so on nrep0, axis0 = read_grid_line(f.readline()) nrep1, axis1 = read_grid_line(f.readline()) nrep2, axis2 = read_grid_line(f.readline()) nrep = np.array([nrep0, nrep1, nrep2], int) axes = np.array([axis0, axis1, axis2]) def read_coordinate_line(line): """Read an atom number and coordinate from the cube file""" words = line.split() return (int(words[0]), float(words[1]), np.array([float(words[2]), float(words[3]), float(words[4])], float) # all coordinates in a cube file are in atomic units ) numbers = np.zeros(natom, int) nuclear_charges = np.zeros(natom, float) coordinates = np.zeros((natom, 3), float) for i in range(natom): numbers[i], nuclear_charges[i], coordinates[i] = read_coordinate_line( f.readline()) molecule = Molecule(numbers, coordinates, title=title) return molecule, origin, axes, nrep, subtitle, nuclear_charges
def guess_geometry(graph, unit_cell=None, verbose=False): """Construct a molecular geometry based on a molecular graph. This routine does not require initial coordinates and will give a very rough picture of the initial geometry. Do not expect all details to be in perfect condition. A subsequent optimization with a more accurate level of theory is at least advisable. Argument: | ``graph`` -- The molecular graph of the system, see :class:molmod.molecular_graphs.MolecularGraph Optional argument: | ``unit_cell`` -- periodic boundry conditions, see :class:`molmod.unit_cells.UnitCell` | ``verbose`` -- Show optimizer progress when True """ N = len(graph.numbers) from molmod.minimizer import Minimizer, ConjugateGradient, \ NewtonLineSearch, ConvergenceCondition, StopLossCondition search_direction = ConjugateGradient() line_search = NewtonLineSearch() convergence = ConvergenceCondition(grad_rms=1e-6, step_rms=1e-6) stop_loss = StopLossCondition(max_iter=500, fun_margin=0.1) ff = ToyFF(graph, unit_cell) x_init = np.random.normal(0, 1, N * 3) # level 1 geometry optimization: graph based ff.dm_quad = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x # level 2 geometry optimization: graph based + pauli repulsion ff.dm_quad = 1.0 ff.dm_reci = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x # Add a little noise to avoid saddle points x_init += np.random.uniform(-0.01, 0.01, len(x_init)) # level 3 geometry optimization: bond lengths + pauli ff.dm_quad = 0.0 ff.dm_reci = 0.2 ff.bond_quad = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x # level 4 geometry optimization: bond lengths + bending angles + pauli ff.bond_quad = 0.0 ff.bond_hyper = 1.0 ff.span_quad = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x x_opt = x_init mol = Molecule(graph.numbers, x_opt.reshape((N, 3))) return mol
def stop_collecting(self): self.molecules.append(Molecule(self.current_atoms)) del self.current_atoms
def test_dump(self): m = Molecule.from_file("input/thf.xyz") psf = PSFFile() psf.add_molecule(m) psf.write_to_file("output/thf.psf")
def iter_test_molecules(self): for filename in ["tpa.xyz", "water.xyz", "thf_single.xyz"]: molecule = Molecule.from_file(os.path.join("input", filename)) molecule.filename = filename molecule.set_default_graph() yield molecule
def __next__(self): """Load the next molecule from the SDF file This method is part of the iterator protocol. """ while True: title = next(self.f) if len(title) == 0: raise StopIteration else: title = title.strip() next(self.f) # skip line next(self.f) # skip empty line words = next(self.f).split() if len(words) < 2: raise FileFormatError( "Expecting at least two numbers at fourth line.") try: num_atoms = int(words[0]) num_bonds = int(words[1]) except ValueError: raise FileFormatError( "Expecting at least two numbers at fourth line.") numbers = np.zeros(num_atoms, int) coordinates = np.zeros((num_atoms, 3), float) for i in range(num_atoms): words = next(self.f).split() if len(words) < 4: raise FileFormatError( "Expecting at least four words on an atom line.") try: coordinates[i, 0] = float(words[0]) coordinates[i, 1] = float(words[1]) coordinates[i, 2] = float(words[2]) except ValueError: raise FileFormatError( "Coordinates must be floating point numbers.") atom = periodic[words[3]] if atom is None: raise FileFormatError("Unrecognized atom symbol: %s" % words[3]) numbers[i] = atom.number coordinates *= angstrom edges = [] orders = np.zeros(num_bonds, int) for i in range(num_bonds): words = next(self.f).split() if len(words) < 3: raise FileFormatError( "Expecting at least three numbers on a bond line.") try: edges.append((int(words[0]) - 1, int(words[1]) - 1)) orders[i] = int(words[2]) except ValueError: raise FileFormatError( "Expecting at least three numbers on a bond line.") formal_charges = np.zeros(len(numbers), int) line = next(self.f) while line != "M END\n": if line.startswith("M CHG"): words = line[6:].split( )[1:] # drop the first number which is the number of charges i = 0 while i < len(words) - 1: try: formal_charges[int(words[i]) - 1] = int(words[i + 1]) except ValueError: raise FileFormatError( "Expecting only integer formal charges.") i += 2 line = next(self.f) # Read on to the next molecule for line in self.f: if line == "$$$$\n": break molecule = Molecule(numbers, coordinates, title) molecule.formal_charges = formal_charges molecule.formal_charges.setflags(write=False) molecule.graph = MolecularGraph(edges, numbers, orders) return molecule
def test_rotsym_cyclopentane(self): molecule = Molecule.from_file("input/cyclopentane.xyz") molecule.set_default_graph() rotsym = compute_rotsym(molecule, molecule.graph) self.assertEqual(rotsym, 1)
def test_rotsym_ethane(self): molecule = Molecule.from_file("input/ethane.xyz") molecule.set_default_graph() rotsym = compute_rotsym(molecule, molecule.graph, threshold=0.01) self.assertEqual(rotsym, 6)
def random_dimer(molecule0, molecule1, thresholds, shoot_max, max_tries=1000): """Create a random dimer. molecule0 and molecule1 are placed in one reference frame at random relative positions. Interatomic distances are above the thresholds. Initially a dimer is created where one interatomic distance approximates the threshold value. Then the molecules are given an additional separation in the range [0,shoot_max]. thresholds has the following format: {frozenset([atom_number1, atom_number2]): distance} """ # apply a random rotation to molecule1 center = numpy.zeros(3, float) angle = numpy.random.uniform(0, 2*numpy.pi) axis = random_unit(3) rotation = rotation_around_center(center, angle, axis) cor1 = numpy.dot(molecule1.coordinates, rotation.r) # select a random atom in each molecule atom0 = numpy.random.randint(len(molecule0.numbers)) atom1 = numpy.random.randint(len(molecule1.numbers)) # define a translation of molecule1 that brings both atoms in overlap delta = molecule0.coordinates[atom0] - cor1[atom1] cor1 += delta # define a random direction direction = random_unit(3) cor1 += 1*direction # move molecule1 along this direction until all intermolecular atomic # distances are above the threshold values threshold_mat = numpy.zeros((len(molecule0.numbers), len(molecule1.numbers)), float) distance_mat = numpy.zeros((len(molecule0.numbers), len(molecule1.numbers)), float) for i1, n1 in enumerate(molecule0.numbers): for i2, n2 in enumerate(molecule1.numbers): threshold = thresholds.get(frozenset([n1,n2])) threshold_mat[i1,i2] = threshold**2 while True: cor1 += 0.1*direction distance_mat[:] = 0 for i in 0,1,2: distance_mat += numpy.subtract.outer(molecule0.coordinates[:,i], cor1[:,i])**2 if (distance_mat > threshold_mat).all(): break # translate over a random distance [0,shoot] along the same direction # (if necessary repeat until no overlap is found) while True: cor1 += direction*numpy.random.uniform(0,shoot_max) distance_mat[:] = 0 for i in 0,1,2: distance_mat += numpy.subtract.outer(molecule0.coordinates[:,i], cor1[:,i])**2 if (distance_mat > threshold_mat).all(): break # done dimer = Molecule( numpy.concatenate([molecule0.numbers, molecule1.numbers]), numpy.concatenate([molecule0.coordinates, cor1]) ) dimer.direction = direction dimer.atom0 = atom0 dimer.atom1 = atom1 return dimer
def next(self): """Load the next molecule from the SDF file This method is part of the iterator protocol. """ while True: title = self.f.next() if len(title) == 0: raise StopIteration else: title = title.strip() self.f.next() # skip line self.f.next() # skip empty line words = self.f.next().split() if len(words) < 2: raise FileFormatError("Expecting at least two numbers at fourth line.") try: num_atoms = int(words[0]) num_bonds = int(words[1]) except ValueError: raise FileFormatError("Expecting at least two numbers at fourth line.") numbers = numpy.zeros(num_atoms, int) coordinates = numpy.zeros((num_atoms, 3), float) for i in xrange(num_atoms): words = self.f.next().split() if len(words) < 4: raise FileFormatError("Expecting at least four words on an atom line.") try: coordinates[i, 0] = float(words[0]) coordinates[i, 1] = float(words[1]) coordinates[i, 2] = float(words[2]) except ValueError: raise FileFormatError("Coordinates must be floating point numbers.") atom = periodic[words[3]] if atom is None: raise FileFormatError("Unrecognized atom symbol: %s" % words[3]) numbers[i] = atom.number coordinates *= angstrom edges = [] orders = numpy.zeros(num_bonds, int) for i in xrange(num_bonds): words = self.f.next().split() if len(words) < 3: raise FileFormatError("Expecting at least three numbers on a bond line.") try: edges.append((int(words[0])-1, int(words[1])-1)) orders[i] = int(words[2]) except ValueError: raise FileFormatError("Expecting at least three numbers on a bond line.") formal_charges = numpy.zeros(len(numbers), int) line = self.f.next() while line != "M END\n": if line.startswith("M CHG"): words = line[6:].split()[1:] # drop the first number which is the number of charges i = 0 while i < len(words)-1: try: formal_charges[int(words[i])-1] = int(words[i+1]) except ValueError: raise FileFormatError("Expecting only integer formal charges.") i += 2 line = self.f.next() # Read on to the next molecule for line in self.f: if line == "$$$$\n": break molecule = Molecule(numbers, coordinates, title) molecule.formal_charges = formal_charges molecule.formal_charges.setflags(write=False) molecule.graph = MolecularGraph(edges, numbers, orders) return molecule
def load_molecule(self, fn): molecule = Molecule.from_file(os.path.join("input", fn)) if molecule.graph is None: molecule.set_default_graph() return molecule
def tune_geometry(graph, mol, unit_cell=None, verbose=False): """Fine tune a molecular geometry, starting from a (very) poor guess of the initial geometry. Do not expect all details to be in perfect condition. A subsequent optimization with a more accurate level of theory is at least advisable. Arguments: | ``graph`` -- The molecular graph of the system, see :class:molmod.molecular_graphs.MolecularGraph | ``mol`` -- A :class:molmod.molecules.Molecule class with the initial guess of the coordinates Optional argument: | ``unit_cell`` -- periodic boundry conditions, see :class:`molmod.unit_cells.UnitCell` | ``verbose`` -- Show optimizer progress when True """ N = len(graph.numbers) from molmod.minimizer import Minimizer, ConjugateGradient, \ NewtonLineSearch, ConvergenceCondition, StopLossCondition search_direction = ConjugateGradient() line_search = NewtonLineSearch() convergence = ConvergenceCondition(grad_rms=1e-6, step_rms=1e-6) stop_loss = StopLossCondition(max_iter=500, fun_margin=1.0) ff = ToyFF(graph, unit_cell) x_init = mol.coordinates.ravel() # level 3 geometry optimization: bond lengths + pauli ff.dm_reci = 0.2 ff.bond_quad = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x # level 4 geometry optimization: bond lengths + bending angles + pauli ff.bond_quad = 0.0 ff.bond_hyper = 1.0 ff.span_quad = 1.0 minimizer = Minimizer(x_init, ff, search_direction, line_search, convergence, stop_loss, anagrad=True, verbose=verbose) x_init = minimizer.x x_opt = x_init mol = Molecule(graph.numbers, x_opt.reshape((N, 3))) return mol
def get_molecule(self, index=0): return Molecule(self.numbers, self.geometries[index], self.titles[index])
def random_dimer(molecule0, molecule1, thresholds, shoot_max): """Create a random dimer. molecule0 and molecule1 are placed in one reference frame at random relative positions. Interatomic distances are above the thresholds. Initially a dimer is created where one interatomic distance approximates the threshold value. Then the molecules are given an additional separation in the range [0, shoot_max]. thresholds has the following format: {frozenset([atom_number1, atom_number2]): distance} """ # apply a random rotation to molecule1 center = np.zeros(3, float) angle = np.random.uniform(0, 2 * np.pi) axis = random_unit() rotation = Complete.about_axis(center, angle, axis) cor1 = np.dot(molecule1.coordinates, rotation.r) # select a random atom in each molecule atom0 = np.random.randint(len(molecule0.numbers)) atom1 = np.random.randint(len(molecule1.numbers)) # define a translation of molecule1 that brings both atoms in overlap delta = molecule0.coordinates[atom0] - cor1[atom1] cor1 += delta # define a random direction direction = random_unit() cor1 += 1 * direction # move molecule1 along this direction until all intermolecular atomic # distances are above the threshold values threshold_mat = np.zeros((len(molecule0.numbers), len(molecule1.numbers)), float) distance_mat = np.zeros((len(molecule0.numbers), len(molecule1.numbers)), float) for i1, n1 in enumerate(molecule0.numbers): for i2, n2 in enumerate(molecule1.numbers): threshold = thresholds.get(frozenset([n1, n2])) threshold_mat[i1, i2] = threshold**2 while True: cor1 += 0.1 * direction distance_mat[:] = 0 for i in 0, 1, 2: distance_mat += np.subtract.outer(molecule0.coordinates[:, i], cor1[:, i])**2 if (distance_mat > threshold_mat).all(): break # translate over a random distance [0, shoot] along the same direction # (if necessary repeat until no overlap is found) while True: cor1 += direction * np.random.uniform(0, shoot_max) distance_mat[:] = 0 for i in 0, 1, 2: distance_mat += np.subtract.outer(molecule0.coordinates[:, i], cor1[:, i])**2 if (distance_mat > threshold_mat).all(): break # done dimer = Molecule(np.concatenate([molecule0.numbers, molecule1.numbers]), np.concatenate([molecule0.coordinates, cor1])) dimer.direction = direction dimer.atom0 = atom0 dimer.atom1 = atom1 return dimer