def structure(self) -> mtr.Structure: coords = self.cclib_out.atomcoords[-1, :, :] * mtr.angstrom zs = self.cclib_out.atomnos atoms = (mtr.Atom(element=Z, position=p) for Z, p in zip(zs, coords)) return mtr.Structure(*atoms)
def _structure_from_pubchem_compound(compound: pcp.Compound) -> mtr.Structure: # FIXME: assumes the pubchem distance unit is angstrom - is this correct?? atom_generator = ((a.element, (a.x, a.y, a.z) * mtr.angstrom) for a in compound.atoms) atoms = (mtr.Atom(element=symb, position=pos) for symb, pos in atom_generator) return mtr.Structure(*atoms)
def _structure_from_identifier(smiles: Optional[str] = None, inchi: Optional[str] = None, num_conformers: int = 25) -> mtr.Structure: # for motivation on generating 25 (as opposed to, say, 10 or 100) conformers, see: # https://github.com/rdkit/UGM_2015/blob/master/Presentations/ETKDG.SereinaRiniker.pdf rdkit.RDLogger.DisableLog("rdApp.*") if smiles is not None: mol = rdkit.Chem.MolFromSmiles(smiles, sanitize=False) elif inchi is not None: mol = rdkit.Chem.MolFromInchi(inchi, sanitize=False) else: raise ValueError( "Either SMILES or InChi required to generate structure.") # sanitize try: mol.UpdatePropertyCache(False) mol = rdkit.Chem.Mol(mol.ToBinary()) rdkit.Chem.SanitizeMol(mol) except ValueError: raise ValueError("Cannot sanitize RDKit molecule.") # hydrogenate mol = rdkit.Chem.AddHs(mol) # embed to generate 3D coords embedding_parameters = rdkit.Chem.AllChem.ETKDG() embed_return_code = rdkit.Chem.AllChem.EmbedMolecule( mol=mol, params=embedding_parameters) if embed_return_code == -1: embedding_parameters.useRandomCoords = True rdkit.Chem.AllChem.EmbedMolecule(mol=mol, params=embedding_parameters) # embed multiple conformers and find one with lowest energy rdkit.Chem.AllChem.EmbedMultipleConfs(mol, numConfs=num_conformers, params=embedding_parameters) # MMFF seems to give slightly better geometries, so it is preferred if possible if rdkit.Chem.AllChem.MMFFHasAllMoleculeParams(mol=mol): mmff_props = rdkit.Chem.AllChem.MMFFGetMoleculeProperties(mol=mol) rdkit.Chem.AllChem.MMFFSanitizeMolecule(mol=mol) def energy(conformer): return rdkit.Chem.AllChem.MMFFGetMoleculeForceField( mol=conformer.GetOwningMol(), pyMMFFMolProperties=mmff_props, confId=conformer.GetId(), ).CalcEnergy() else: def energy(conformer): return rdkit.Chem.AllChem.UFFGetMoleculeForceField( mol=conformer.GetOwningMol(), confId=conformer.GetId()).CalcEnergy() energies = { conformer: energy(conformer) for conformer in mol.GetConformers() } conformer = min(energies, key=energies.get) # convert to Structure symbols = (a.GetSymbol() for a in conformer.GetOwningMol().GetAtoms()) # FIXME: assumes the RDKIT distance unit is angstrom - is this correct?? # NOTE: using conformer.GetPositions sometimes causes # a seg fault (RDKit) - use GetAtomPosition instead atoms = (mtr.Atom( element=symbol, position=conformer.GetAtomPosition(i) * mtr.angstrom, ) for i, symbol in enumerate(symbols)) return mtr.Structure(*atoms)
def __add__(self, other: mtr.Structure) -> mtr.Structure: return mtr.Structure((*self.atoms, *other.atoms))
import materia as mtr amm = mtr.Structure.read("~/ammonia.xyz") basis = ase.Atoms(amm.atomic_symbols, amm.atomic_positions.T) ase_crystal = ase.spacegroup.crystal( basis=basis, spacegroup=198, cellpar=[ 4.9621636999999996, 4.9621636999999996, 4.9621636999999996, 90, 90, 90 ], size=(3, 3, 3), ) ase.io.write("~/ammonia_crystal.xyz", ase_crystal) crystal = mtr.Structure.read("~/ammonia_crystal.xyz") atoms = tuple(crystal.atoms[k] for k, v in crystal.perceive_bonds().items() if (crystal.atoms[k].Z == 7 and len(v) == 3) or ( crystal.atoms[k].Z == 1 and len(v) == 1)) bonds = crystal.perceive_bonds() while len(bonds) > len(atoms): bonds = crystal.perceive_bonds() atoms = tuple(crystal.atoms[k] for k, v in crystal.perceive_bonds().items() if (crystal.atoms[k].Z == 7 and len(v) == 3) or ( crystal.atoms[k].Z == 1 and len(v) == 1)) crystal = mtr.Structure(atoms) crystal.write("~/good_ammonia_crystal.xyz", overwrite=True)