示例#1
0
    def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles):
        '''
        Not used
        '''
        atom_ring_times = [0] * rdk_mol.GetNumAtoms()
        bond_ring_times = [0] * rdk_mol.GetNumBonds()

        # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR
        ri = rdk_mol.GetRingInfo()
        for id_atoms in ri.AtomRings():
            for ida in id_atoms:
                atom_ring_times[ida] += 1
        for id_bonds in ri.BondRings():
            for idb in id_bonds:
                bond_ring_times[idb] += 1

        n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times)))
        n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times)))

        py_mol = pybel.readstring('smi', smiles)
        if ri.NumRings() != len(py_mol.sssr):
            print(
                'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:',
                smiles)
            n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__()
            n_bonds_multiring = n_atoms_multiring - 1

        return n_atoms_multiring, n_bonds_multiring
示例#2
0
def find_rings(mol: Mol) -> List[List[int]]:
    ring_info = mol.GetRingInfo()
    rings = ring_info.AtomRings()

    i2map = dict((i, a.GetAtomMapNum()) for i, a in enumerate(mol.GetAtoms()))
    rings_mapped = []

    for ring in rings:
        rings_mapped.append([i2map[i] for i in ring])

    return rings_mapped
示例#3
0
def construct_mol_features(mol: rdchem.Mol,
                           out_size: Optional[int] = -1) -> np.ndarray:
    """Returns the atom features of all the atoms in the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest.

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    mol_feats: np.ndarray, shape=(n,m)
        Where `n` is the total number of atoms within the molecule, and 
        `m` is the number of feats.
    """
    # Caluclate charges and chirality of atoms within molecule
    rdPartialCharges.ComputeGasteigerCharges(
        mol)  # stored under _GasteigerCharge
    rdmolops.AssignStereochemistry(
        mol)  # stored under _CIPCode, see doc for more info

    # Retrieve atom index locations of matches
    HYDROGEN_DONOR = rdmolfiles.MolFromSmarts(
        "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]")
    HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" +
        ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," +
        "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]")
    ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    BASIC = rdmolfiles.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" +
        ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," +
        "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]")
    hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR),
                                  ())
    acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ())
    basic_match = sum(mol.GetSubstructMatches(BASIC), ())

    # Get ring info
    ring = mol.GetRingInfo()

    mol_feats = []
    n_atoms = mol.GetNumAtoms()
    for atom_idx in range(n_atoms):
        atom = mol.GetAtomWithIdx(atom_idx)

        atom_feats = []
        atom_feats += one_hot(atom.GetSymbol(), [
            'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na',
            'Sn', 'Se', 'other'
        ])
        atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetHybridization(),
                              list(rdchem.HybridizationType.names.values()))
        atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6])
        atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3])
        g_charge = float(atom.GetProp("_GasteigerCharge"))
        atom_feats += [g_charge] if not np.isnan(g_charge) else [0.]
        atom_feats += [atom.GetIsAromatic()]

        atom_feats += [
            ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9)
        ]
        atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4])

        # Chirality
        try:
            atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [
                atom.HasProp("_ChiralityPossible")
            ]
        except:
            atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")]
        # Hydrogen bonding
        atom_feats += [atom_idx in hydrogen_donor_match]
        atom_feats += [atom_idx in hydrogen_acceptor_match]
        # Is Acidic/Basic
        atom_feats += [atom_idx in acidic_match]
        atom_feats += [atom_idx in basic_match]

        mol_feats.append(atom_feats)

    if out_size < 0:
        return np.array(mol_feats, dtype=np.float)
    elif out_size >= n_atoms:
        # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols
        # NOTE: len(mol_feats[0]) is the number of feats
        padded_mol_feats = np.zeros((out_size, len(mol_feats[0])),
                                    dtype=np.float)
        padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float)
        return padded_mol_feats
    else:
        raise ValueError(
            '`out_size` (N={}) must be negative or larger than or '
            'equal to the number of atoms in the input molecules (N={}).'.
            format(out_size, n_atoms))