def get_smiles(molecule): """ Get the RDKit canonical, isomeric SMILES of `molecule`. Parameters ---------- molecule : :class:`.Molecule` The molecule whose SMILES is required. Returns ------- :class:`str` The SMILES. """ rdkit_mol = molecule.with_canonical_atom_ordering().to_rdkit_mol() rdkit.SanitizeMol(rdkit_mol) rdkit.AssignStereochemistryFrom3D(rdkit_mol) rdkit_mol = rdkit.RemoveHs(rdkit_mol) return rdkit.MolToSmiles( mol=rdkit_mol, isomericSmiles=True, canonical=True, )
def generate_inchi_and_xyz(mol_string: str, special_cases: bool = True) -> Tuple[str, str]: """Generate the XYZ coordinates and InChI string for a molecule using a standard procedure. We use the following deterministic procedure: 1. Generates 3D coordinates with RDKit. Use a set random number seed 2. Assign yet-undetermined stereochemistry based on the 3D geometry 3. Generate an InCHi string for the molecules We then have post-processing steps for common mistakes in generating geometries: 1. Ensuring cyclopropenyl groups are planar Args: mol_string: SMILES or InChI string special_cases: Whether to perform the post-processing Returns: - InChI string for the molecule - XYZ coordinates for the molecule """ with _generate_lock: # Generate 3D coordinates for the molecule mol = parse_from_molecule_string(mol_string) mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol, randomSeed=1) AllChem.MMFFOptimizeMolecule(mol) # Generate an InChI string with stereochemistry information AllChem.AssignStereochemistryFrom3D(mol) inchi = Chem.MolToInchi(mol) # Save geometry as 3D coordinates xyz = f"{mol.GetNumAtoms()}\n" xyz += inchi + "\n" conf = mol.GetConformer() for i, a in enumerate(mol.GetAtoms()): s = a.GetSymbol() c = conf.GetAtomPosition(i) xyz += f"{s} {c[0]} {c[1]} {c[2]}\n" # Special cases for odd kinds of molecules if special_cases: fix_cyclopropenyl(xyz, mol_string) return inchi, xyz