def get_smiles(base_path, molecules, extra=False): """ Returns SMILES strings for a set of molecules :param base_path: Directory to begin search in. :param molecules: List of strings representing molecule IDs. :param extra: If True (default False), give molecule ID with SMILES :return: list of SMILES strings """ smiles = [] if base_path is not None: for mol in molecules: path = join(base_path, mol) molfile = join(path, "{}.mol".format(mol)) obmol = BabelMolAdaptor.from_file(molfile, file_format="mol") smi = obmol.pybel_mol.write("smi").replace("\t\n", "") if extra: smiles.append((smi, mol)) else: smiles.append(smi) return smiles else: raise ValueError("No path given.")
def run(self, site_property=None): """ Write the input file to the scratch directory, run packmol and return the packed molecule to the current working directory. Args: site_property (str): if set then the specified site property for the the final packed molecule will be restored. Returns: Molecule object """ with tempfile.TemporaryDirectory() as scratch_dir: self._write_input(input_dir=scratch_dir) with open(os.path.join(scratch_dir, self.input_file), "r") as packmol_input: with Popen(self.packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE) as p: (stdout, stderr) = p.communicate() output_file = os.path.join(self.control_params["output"]) if os.path.isfile(output_file): packed_mol = BabelMolAdaptor.from_file( output_file, self.control_params["filetype"]) packed_mol = packed_mol.pymatgen_mol print("packed molecule written to {}".format( self.control_params["output"])) if site_property: packed_mol = self.restore_site_properties( site_property=site_property, filename=output_file) return packed_mol raise RuntimeError("Packmol execution failed. %s\n%s" % (stdout, stderr))
def test_from_file_return_all_molecules(self): adaptors = BabelMolAdaptor.from_file( os.path.join(PymatgenTest.TEST_FILES_DIR, "multiple_frame_xyz.xyz"), "xyz", return_all_molecules=True, ) self.assertEqual(len(adaptors), 302)
def restore_site_properties(self, site_property="ff_map", filename=None): """ Restore the site properties for the final packed molecule. Args: site_property (str): filename (str): path to the final packed molecule. Returns: Molecule """ # only for pdb if not self.control_params["filetype"] == "pdb": raise filename = filename or self.control_params["output"] bma = BabelMolAdaptor.from_file(filename, "pdb") pbm = pb.Molecule(bma._obmol) assert len(pbm.residues) == sum([x["number"] for x in self.param_list]) packed_mol = self.convert_obatoms_to_molecule( pbm.residues[0].atoms, residue_name=pbm.residues[0].name, site_property=site_property) for resid in pbm.residues[1:]: mol = self.convert_obatoms_to_molecule( resid.atoms, residue_name=resid.name, site_property=site_property) for site in mol: packed_mol.append(site.species_and_occu, site.coords, properties=site.properties) return packed_mol
def run(self, copy_to_current_on_exit=False, site_property=None): """ Write the input file to the scratch directory, run packmol and return the packed molecule. Args: copy_to_current_on_exit (bool): Whether or not to copy the packmol input/output files from the scratch directory to the current directory. site_property (str): if set then the specified site property for the the final packed molecule will be restored. Returns: Molecule object """ scratch = tempfile.gettempdir() with ScratchDir(scratch, copy_to_current_on_exit=copy_to_current_on_exit) as scratch_dir: self._write_input(input_dir=scratch_dir) packmol_input = open(os.path.join(scratch_dir, self.input_file), 'r') p = Popen(self.packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE) (stdout, stderr) = p.communicate() output_file = os.path.join(scratch_dir, self.control_params["output"]) if os.path.isfile(output_file): packed_mol = BabelMolAdaptor.from_file(output_file, self.control_params["filetype"]) packed_mol = packed_mol.pymatgen_mol print("packed molecule written to {}".format( self.control_params["output"])) if site_property: packed_mol = self.restore_site_properties(site_property=site_property, filename=output_file) return packed_mol else: print("Packmol execution failed") print(stdout, stderr) return None
def restore_site_properties(self, site_property="ff_map", filename=None): """ Restore the site properties for the final packed molecule. Args: site_property (str): filename (str): path to the final packed molecule. Returns: Molecule """ # only for pdb if not self.control_params["filetype"] == "pdb": raise ValueError() filename = filename or self.control_params["output"] bma = BabelMolAdaptor.from_file(filename, "pdb") pbm = pb.Molecule(bma._obmol) assert len(pbm.residues) == sum(x["number"] for x in self.param_list) packed_mol = self.convert_obatoms_to_molecule( pbm.residues[0].atoms, residue_name=pbm.residues[0].name, site_property=site_property, ) for resid in pbm.residues[1:]: mol = self.convert_obatoms_to_molecule(resid.atoms, residue_name=resid.name, site_property=site_property) for site in mol: packed_mol.append(site.species, site.coords, properties=site.properties) return packed_mol
def run(self, copy_to_current_on_exit=False): """ Write the input file to the scratch directory, run packmol and return the packed molecule. Args: copy_to_current_on_exit (bool): Whether or not to copy the packmol input/output files from the scratch directory to the current directory. Returns: Molecule object """ scratch = tempfile.gettempdir() with ScratchDir(scratch, copy_to_current_on_exit=copy_to_current_on_exit) as scratch_dir: self._write_input(input_dir=scratch_dir) packmol_bin = ['packmol'] packmol_input = open(os.path.join(scratch_dir, self.input_file), 'r') p = Popen(packmol_bin, stdin=packmol_input, stdout=PIPE, stderr=PIPE) p.wait() (stdout, stderr) = p.communicate() output_file = os.path.join(scratch_dir, self.control_params["output"]) if os.path.isfile(output_file): packed_mol = BabelMolAdaptor.from_file(output_file) print("packed molecule written to {}".format( self.control_params["output"])) return packed_mol.pymatgen_mol else: print("Packmol execution failed") print(stdout, stderr) return None
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename (str): A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"]]): return GaussianInput.from_file(filename).molecule elif any([fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"]]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=MontyDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def run(self): """ Runs packmol Returns: Molecule object """ scratch = tempfile.gettempdir() with ScratchDir(scratch, copy_to_current_on_exit=True) as d: with open(os.path.join(d, self.input_file), 'wt', encoding="utf-8") as inp: for k, v in six.iteritems(self.control_params): inp.write('{} {}\n'.format(k, self._format_param_val(v))) for idx, mol in enumerate(self.mols): a = BabelMolAdaptor(mol) pm = pb.Molecule(a.openbabel_mol) pm.write(self.control_params["filetype"], filename=os.path.join(d, '{}.{}'.format(idx, self.control_params[ "filetype"])).encode( "ascii"), overwrite=True) inp.write("\n") inp.write( "structure {}.{}\n".format(os.path.join(d, str(idx)), self.control_params[ "filetype"])) for k, v in six.iteritems(self.param_list[idx]): inp.write( ' {} {}\n'.format(k, self._format_param_val(v))) inp.write('end structure\n') proc = Popen(['packmol'], stdin=open(os.path.join(d, self.input_file), 'r'), stdout=PIPE) (stdout, stderr) = proc.communicate() output_file = os.path.join(d, self.control_params["output"]) if os.path.isfile(output_file): packed_mol = BabelMolAdaptor.from_file(output_file) print("packed molecule written to {}".format( self.control_params["output"])) return packed_mol.pymatgen_mol else: print("Packmol execution failed") print(stdout, stderr) return None
def get_molecule(molfile): """ Create pymatgen Molecule object from molecule data file. In addition to parsing the input, this function also performs a conformer search to get a reasonable starting structure. :param molfile: Absolute path to structure file (.mol, .sdf, etc.) :return: Molecule. """ obmol = BabelMolAdaptor.from_file(molfile, file_format="mol") # OBMolecule does not contain pymatgen Molecule information # So, we need to wrap the obmol in a BabelMolAdapter and extract obmol.add_hydrogen() obmol.make3d() obmol.localopt() return obmol.pymatgen_mol
def read_mol(filename): """ Reads a molecule based on file extension. For example, anything ending in a "xyz" is assumed to be a XYZ file. Supported formats include xyz, gaussian input (gjf|g03|g09|com|inp), Gaussian output (.out|and pymatgen's JSON serialized molecules. Using openbabel, many more extensions are supported but requires openbabel to be installed. Args: filename (str): A filename to read from. Returns: A Molecule object. """ fname = os.path.basename(filename) if fnmatch(fname.lower(), "*.xyz*"): return XYZ.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["gjf", "g03", "g09", "com", "inp"] ]): return GaussianInput.from_file(filename).molecule elif any([ fnmatch(fname.lower(), "*.{}*".format(r)) for r in ["out", "lis", "log"] ]): return GaussianOutput(filename).final_structure elif fnmatch(fname, "*.json*") or fnmatch(fname, "*.mson*"): with zopen(filename) as f: s = json.load(f, cls=MontyDecoder) if type(s) != Molecule: raise IOError("File does not contain a valid serialized " "molecule") return s else: m = re.search("\.(pdb|mol|mdl|sdf|sd|ml2|sy2|mol2|cml|mrv)", filename.lower()) if m: return BabelMolAdaptor.from_file(filename, m.group(1)).pymatgen_mol raise ValueError("Unrecognized file extension!")
def write_pdb(self, mol, filename, name=None, num=None): """ dump the molecule into pdb file with custom residue name and number. """ # ugly hack to get around the openbabel issues with inconsistent # residue labelling. scratch = tempfile.gettempdir() with ScratchDir(scratch, copy_to_current_on_exit=False) as _: mol.to(fmt="pdb", filename="tmp.pdb") bma = BabelMolAdaptor.from_file("tmp.pdb", "pdb") num = num or 1 name = name or "ml{}".format(num) # bma = BabelMolAdaptor(mol) pbm = pb.Molecule(bma._obmol) for i, x in enumerate(pbm.residues): x.OBResidue.SetName(name) x.OBResidue.SetNum(num) pbm.write(format="pdb", filename=filename, overwrite=True)
def test_from_file(self): adaptor = BabelMolAdaptor.from_file( os.path.join(PymatgenTest.TEST_FILES_DIR, "molecules/Ethane_e.pdb"), "pdb" ) mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H6 C2")
def main(): def gcd(a, b): if b == 0: return a else: return gcd(b, a % b) def lcm(a, b): return a * b / gcd(a, b) import argparse parser = argparse.ArgumentParser( description="Place salt around a molecule") parser.add_argument("-m", "--molecule", dest="molecule", type=str, required=True, help="the file name of molecule") parser.add_argument("-l", "--ligand", dest="fragments", type=str, nargs='+', required=True, help="the list of fragment file names to to be placed around the molecule") parser.add_argument("-n", "--nums_fragments", dest="nums_fragments", type=int, nargs='+', required=True, help="the number of each fragment, the order must be the same with FRAGMENTS") parser.add_argument("-c", "--charge", dest="charge", type=int, required=True, help="total charge of the system") parser.add_argument("-t", "--taboo_tolerance", dest="taboo_tolerance", type=float, default=1.0, help="The radius to taboo a solution (in Angstrom)") parser.add_argument("-r", "--ratio_taboo_particles", dest="ratio_taboo_particles", type=float, default=0.5, help="ratio of particle within the tolerance to consider taboo current solution") parser.add_argument("-o", "--outputfile", dest="outputfile", type=str, required=True, help="the file name of the aligned conformer") parser.add_argument("-i", "--iterations", dest="iterations", type=int, default=600, help="maximum number of evaluations") parser.add_argument("-s", "--size", dest="size", type=int, default=15, help="population size") parser.add_argument("-k", "--num_neighbours", dest="num_neighbours", type=int, default=2, help="number of neighbours") parser.add_argument("--force_ordered_fragment", dest="force_ordered_fragment", action="store_true", help="set this option to keep the fragment of the same in the order of input along the X-axis") parser.add_argument("--topology", dest="topology", choices=["ring", "star"], type=str, default="ring", help="the topology of the PSO information network") parser.add_argument("--initial_guess", dest="initial_guess", choices=["breadth", "center", "volume"], default="breadth", help="where should particles should be initially put") parser.add_argument("--bound_setter", dest="bound_setter", choices=["chain", "volume"], default="chain", help="method to set the bound conditions of PSO") parser.add_argument("--always_write_best", dest="always_write_best", action="store_true", help="enable this option to output the best structure at every iteration") parser.add_argument("--random_seed", dest="random_seed", default=None, type=int, help="random seed for PSO, an integer is expected") parser.add_argument("--max_generations_each_conformer", dest="max_generations_each_conformer", default=100, type=int, help="maximum generations for each conformer") parser.add_argument("-e", "--evaluator", dest="evaluator", type=str, default="hardsphere", choices=["hardsphere", "sqm"], help="Energy Evaluator") options = parser.parse_args() if options.evaluator == 'hardsphere': qcout_molecule = QcOutput(options.molecule) qcout_cation = QcOutput(options.cation) qcout_anion = QcOutput(options.anion) total_charge_cation = qcout_cation.data[0]["molecules"][-1].charge total_charge_anion = qcout_anion.data[0]["molecules"][-1].charge total_charge_mol = qcout_molecule.data[0]["molecules"][-1].charge num_lcm = lcm(total_charge_cation, -total_charge_anion) num_cation = num_lcm / total_charge_cation num_anion = num_lcm / -total_charge_anion pymatgen_mol_molecule = qcout_molecule.data[0]["molecules"][-1] pymatgen_mol_cation = qcout_cation.data[0]["molecules"][-1] pymatgen_mol_anion = qcout_anion.data[0]["molecules"][-1] # noinspection PyProtectedMember molecule = BabelMolAdaptor(pymatgen_mol_molecule)._obmol # noinspection PyProtectedMember obmol_cation = BabelMolAdaptor(pymatgen_mol_cation)._obmol # noinspection PyProtectedMember obmol_anion = BabelMolAdaptor(pymatgen_mol_anion)._obmol energy_evaluator = HardSphereElectrostaticEnergyEvaluator.from_qchem_output( qcout_molecule, qcout_cation, qcout_anion) fragments = [obmol_cation, obmol_anion] else: # noinspection PyProtectedMember molecule = BabelMolAdaptor.from_file(options.molecule, os.path.splitext( options.molecule)[1][ 1:])._obmol fragments = [] for frag_file in options.fragments: file_format = os.path.splitext(frag_file)[1][1:] # noinspection PyProtectedMember fragments.append( BabelMolAdaptor.from_file(frag_file, file_format)._obmol) energy_evaluator = SemiEmpricalQuatumMechanicalEnergyEvaluator( molecule, fragments, options.nums_fragments, total_charge=options.charge, taboo_tolerance_ang=options.taboo_tolerance, force_order_fragment=options.force_ordered_fragment, bound_setter=options.bound_setter) if len(fragments) != len(options.nums_fragments): raise ValueError( "you must specify the duplicated count for every fragment") placer = IonPlacer(molecule=molecule, fragments=fragments, nums_fragments=options.nums_fragments, energy_evaluator=energy_evaluator, taboo_tolerance_ang=options.taboo_tolerance, taboo_tolerance_particle_ratio=options.ratio_taboo_particles, topology=options.topology, initial_guess=options.initial_guess, bound_setter=options.bound_setter, always_write_best=options.always_write_best, random_seed=options.random_seed, max_generations_each_conformer=options.max_generations_each_conformer) energy_evaluator.arranger = placer placer.place(max_evaluations=options.iterations, pop_size=options.size, neighborhood_size=options.num_neighbours) print('It took {:.1f} seconds to place the salt'.format(placer .playing_time))
def test_from_file(self): adaptor = BabelMolAdaptor.from_file( os.path.join(test_dir, "Ethane_e.pdb"), "pdb") mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H6 C2")
def test_from_file_return_all_molecules(self): adaptors = BabelMolAdaptor.from_file(os.path.join( test_dir, "multiple_frame_xyz.xyz"), "xyz", return_all_molecules=True) self.assertEqual(len(adaptors), 302)
def __init__(self, molecule, optimize=False): """ Instantiation method for FunctionalGroupExtractor. :param molecule: Either a filename, a pymatgen.core.structure.Molecule object, or a pymatgen.analysis.graphs.MoleculeGraph object. :param optimize: Default False. If True, then the input molecule will be modified, adding Hydrogens, performing a simple conformer search, etc. """ self.molgraph = None if isinstance(molecule, str): try: if optimize: obmol = BabelMolAdaptor.from_file(molecule, file_format="mol") # OBMolecule does not contain pymatgen Molecule information # So, we need to wrap the obmol in a BabelMolAdapter obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = Molecule.from_file(molecule) except OSError: raise ValueError("Input must be a valid molecule file, a " "Molecule object, or a MoleculeGraph object.") elif isinstance(molecule, Molecule): if optimize: obmol = BabelMolAdaptor(molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule elif isinstance(molecule, MoleculeGraph): if optimize: obmol = BabelMolAdaptor(molecule.molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule.molecule self.molgraph = molecule else: raise ValueError("Input to FunctionalGroupExtractor must be" "str, Molecule, or MoleculeGraph.") if self.molgraph is None: self.molgraph = MoleculeGraph.with_local_env_strategy(self.molecule, OpenBabelNN(), reorder=False, extend_structure=False) # Assign a specie and coordinates to each node in the graph, # corresponding to the Site in the Molecule object self.molgraph.set_node_attributes() self.species = nx.get_node_attributes(self.molgraph.graph, "specie")
def __init__(self, molecule, optimize=False): """ Instantiation method for FunctionalGroupExtractor. :param molecule: Either a filename, a pymatgen.core.structure.Molecule object, or a pymatgen.analysis.graphs.MoleculeGraph object. :param optimize: Default False. If True, then the input molecule will be modified, adding Hydrogens, performing a simple conformer search, etc. """ self.molgraph = None if isinstance(molecule, str): try: if optimize: obmol = BabelMolAdaptor.from_file(molecule, file_format="mol") # OBMolecule does not contain pymatgen Molecule information # So, we need to wrap the obmol in a BabelMolAdapter obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = Molecule.from_file(molecule) except OSError: raise ValueError("Input must be a valid molecule file, a " "Molecule object, or a MoleculeGraph object.") elif isinstance(molecule, Molecule): if optimize: obmol = BabelMolAdaptor(molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule elif isinstance(molecule, MoleculeGraph): if optimize: obmol = BabelMolAdaptor(molecule.molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule.molecule self.molgraph = molecule else: raise ValueError("Input to FunctionalGroupExtractor must be" "str, Molecule, or MoleculeGraph.") if self.molgraph is None: self.molgraph = MoleculeGraph.with_local_env_strategy( self.molecule, OpenBabelNN(), reorder=False, extend_structure=False) # Assign a specie and coordinates to each node in the graph, # corresponding to the Site in the Molecule object self.molgraph.set_node_attributes() self.species = nx.get_node_attributes(self.molgraph.graph, "specie")