Exemplo n.º 1
0
Arquivo: mol.py Projeto: cgrambow/bac
def str_to_mol(identifier, single_bonds=False):
    if identifier.startswith('InChI'):
        mol = molecule.Molecule().fromInChI(identifier, backend='rdkit-first')
    else:
        mol = molecule.Molecule().fromSMILES(identifier)
    if single_bonds:
        return mol.toSingleBonds()
    else:
        return mol
Exemplo n.º 2
0
def _rdkit_translator(input_object, identifier_type, mol=None):
    """
    Converts between formats using RDKit. If input is a :class:`Molecule`,
    the identifier_type is used to determine the output type. If the input is
    a `str`, then the identifier_type is used to identify the input, and the
    desired output is assumed to be a :class:`Molecule` object.

    Args:
        input_object: either molecule or string identifier
        identifier_type: format of string identifier
            'inchi'    -> InChI
            'inchikey' -> InChI Key
            'sma'      -> SMARTS
            'smi'      -> SMILES
        mol: molecule object for output (optional)
    """
    if identifier_type == 'inchi' and not Chem.inchi.INCHI_AVAILABLE:
        raise DependencyError("RDKit installed without InChI. Please reinstall to read and write InChI strings.")

    if isinstance(input_object, str):
        # We are converting from a string identifier to a molecule
        if identifier_type == 'inchi':
            rdkitmol = Chem.inchi.MolFromInchi(input_object, removeHs=False)
        elif identifier_type == 'sma':
            rdkitmol = Chem.MolFromSmarts(input_object)
        elif identifier_type == 'smi':
            rdkitmol = Chem.MolFromSmiles(input_object)
        else:
            raise ValueError('Identifier type {0} is not supported for reading using RDKit.'.format(identifier_type))
        if rdkitmol is None:
            raise ValueError("Could not interpret the identifier {0!r}".format(input_object))
        if mol is None:
            mol = mm.Molecule()
        output = fromRDKitMol(mol, rdkitmol)
    elif isinstance(input_object, mm.Molecule):
        # We are converting from a molecule to a string identifier
        if identifier_type == 'smi':
            rdkitmol = toRDKitMol(input_object, sanitize=False)
        else:
            rdkitmol = toRDKitMol(input_object, sanitize=True)
        if identifier_type == 'inchi':
            output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon')
        elif identifier_type == 'inchikey':
            inchi = toInChI(input_object)
            output = Chem.inchi.InchiToInchiKey(inchi)
        elif identifier_type == 'sma':
            output = Chem.MolToSmarts(rdkitmol)
        elif identifier_type == 'smi':
            if input_object.isAromatic():
                output = Chem.MolToSmiles(rdkitmol)
            else:
                output = Chem.MolToSmiles(rdkitmol, kekuleSmiles=True)
        else:
            raise ValueError('Identifier type {0} is not supported for writing using RDKit.'.format(identifier_type))
    else:
        raise ValueError('Unexpected input format. Should be a Molecule or a string.')

    return output
Exemplo n.º 3
0
Arquivo: mol.py Projeto: cgrambow/q2t
def geo_to_rmg_mol(geo):
    symbols, coords = geo
    if len(symbols) == 2 and all(s == 'H' for s in symbols):
        nums = np.array([atomic_symbol_dict[s] for s in symbols])
        mol = molecule.Molecule()
        mol.fromXYZ(nums, coords)
    else:
        mol = geo_to_pybel_mol(geo)
        mol = pybel_to_rmg(mol)
    return mol
Exemplo n.º 4
0
def _openbabel_translator(input_object, identifier_type, mol=None):
    """
    Converts between formats using OpenBabel. If input is a :class:`Molecule`,
    the identifier_type is used to determine the output type. If the input is
    a `str`, then the identifier_type is used to identify the input, and the
    desired output is assumed to be a :class:`Molecule` object.

    Args:
        input_object: either molecule or string identifier
        identifier_type: format of string identifier
            'inchi'    -> InChI
            'inchikey' -> InChI Key
            'smi'      -> SMILES
        mol: molecule object for output (optional)
    """
    ob_conversion = openbabel.OBConversion()

    if isinstance(input_object, str):
        # We are converting from a string identifier to a Molecule
        ob_conversion.SetInFormat(identifier_type)
        obmol = openbabel.OBMol()
        ob_conversion.ReadString(obmol, input_object)
        obmol.AddHydrogens()
        # In OpenBabel 3+ the function obmol.AssignSpinMultiplicity(True) does nothing.
        # We could write our own method here and call obatom.SetSpinMultiplicity on
        # each atom, but instead we will leave them blank for now and fix them 
        # in the from_ob_mol() method.
        if mol is None:
            mol = mm.Molecule()
        output = from_ob_mol(mol, obmol)
    elif isinstance(input_object, mm.Molecule):
        # We are converting from a Molecule to a string identifier
        if identifier_type == 'inchi':
            ob_conversion.SetOutFormat('inchi')
            ob_conversion.AddOption('w')
        elif identifier_type == 'inchikey':
            ob_conversion.SetOutFormat('inchi')
            ob_conversion.AddOption('w')
            ob_conversion.AddOption('K')
        elif identifier_type == 'smi':
            ob_conversion.SetOutFormat('can')
            # turn off isomer and stereochemistry information
            ob_conversion.AddOption('i')
        else:
            raise ValueError('Unexpected identifier type {0}.'.format(identifier_type))
        obmol = to_ob_mol(input_object)
        output = ob_conversion.WriteString(obmol).strip()
    else:
        raise ValueError('Unexpected input format. Should be a Molecule or a string.')

    return output
Exemplo n.º 5
0
    def to_rmg_mol(self):
        if self.rmg_mol is not None:
            return self.rmg_mol

        import rmgpy.molecule.molecule as rmg_molecule

        rmg_atoms = [rmg_molecule.Atom(element=atom.symbol, coords=atom.coords) for atom in self]
        mapping = {atom: rmg_atom for atom, rmg_atom in zip(self.atoms, rmg_atoms)}
        rmg_bonds = [rmg_molecule.Bond(mapping[connection.atom1], mapping[connection.atom2])
                     for connection in self.get_all_connections()]
        rmg_mol = rmg_molecule.Molecule(atoms=rmg_atoms)
        for bond in rmg_bonds:
            rmg_mol.add_bond(bond)

        self.rmg_mol = rmg_mol
        return rmg_mol
Exemplo n.º 6
0
Arquivo: mol.py Projeto: cgrambow/bac
def pybel_to_rmg(pybelmol, addh=False):
    """ignore charge, multiplicity, and bond orders"""
    mol = molecule.Molecule()
    if addh:
        pybelmol.addh()
    for pybelatom in pybelmol:
        num = pybelatom.atomicnum
        element = elements.getElement(num)
        atom = molecule.Atom(element=element,
                             coords=np.array(pybelatom.coords))
        mol.vertices.append(atom)
    for obbond in pybel.ob.OBMolBondIter(pybelmol.OBMol):
        begin_idx = obbond.GetBeginAtomIdx() - 1
        end_idx = obbond.GetEndAtomIdx() - 1
        bond = molecule.Bond(mol.vertices[begin_idx], mol.vertices[end_idx])
        mol.addBond(bond)
    return mol.toSingleBonds()
Exemplo n.º 7
0
Arquivo: mol.py Projeto: cgrambow/bac
def geo_to_mol(geo):
    """
    Use Open Babel for most cases because it's better at recognizing long bonds.
    Use RMG for hydrogen because Open Babel can't do it for mysterious reasons.
    """
    symbols, coords = geo
    if len(symbols) == 2 and all(s == 'H' for s in symbols):
        nums = np.array([atomic_symbol_dict[s] for s in symbols])
        mol = molecule.Molecule()
        mol.fromXYZ(nums, coords)
    else:
        xyz = '{}\n\n'.format(len(symbols))
        coords = [
            '{0}  {1[0]: .10f}  {1[1]: .10f}  {1[2]: .10f}'.format(s, c)
            for s, c in zip(*geo)
        ]
        xyz += '\n'.join(coords)
        mol = pybel.readstring('xyz', xyz)
        mol = pybel_to_rmg(mol)
    return mol