def get_atom_proplist(mol,
                      sa_dict=None,
                      aids=[],
                      base_prop=['AtomicMass'],
                      hash_type='str'):
    """
    Compute the average properties for a set of atoms in mol (indexed by aids).
    Parameters:
        mol - a rdkit.Chem.rdchem.Mol molecule
        sa_dict - a dictionary mapping atom indices to their solid angles
        aids - the indices of atoms
        base_prop - the property list for the computations
        hash_type - type for hash the properties, can be 'str' or 'vec'
    Returns the computed property list, for 'str' return ['xxx', 'xx.xx', ...] (float number are recorded as %.2f), for 'vec' return the prop list
    """

    tbl = GetPeriodicTable()
    proplist = []
    if len(aids) == 0:
        return proplist
    else:
        proplist = {
            'AtomicMass': 0,
            'TotalConnections': 0,
            'HCount': 0,
            'HeavyNeighborCount': 0,
            'FormalCharge': 0,
            'DeltaMass': 0,
            'SolidAngle': 0,
            'SolidAngleValue': 0,
            'SolidAngleSign': ''
        }
        # compute averaged property
        for aid in aids:
            atom = mol.GetAtomWithIdx(aid)
            if 'AtomicMass' in base_prop:
                proplist['AtomicMass'] += atom.GetMass()
            if 'TotalConnections' in base_prop:
                proplist['TotalConnections'] += atom.GetDegree()
            if 'HCount' in base_prop:
                proplist['HCount'] += atom.GetNumExplicitHs()
            if 'HeavyNeighborCount' in base_prop:
                proplist['HeavyNeighborCount'] += len([
                    bond.GetOtherAtom(atom) for bond in atom.GetBonds()
                    if bond.GetOtherAtom(atom).GetAtomicNum() > 1
                ])
            if 'FormalCharge' in base_prop:
                proplist['FormalCharge'] += atom.GetFormalCharge()
            if 'DeltaMass' in base_prop:
                proplist['DeltaMass'] += (
                    atom.GetMass() - tbl.GetAtomicWeight(atom.GetAtomicNum()))
            if len([p for p in base_prop if 'SolidAngle' in p]) > 0:
                sa = sa_dict[aid]
                tmp_prop = 0 if (sa is None) else sa
                proplist['SolidAngle'] += tmp_prop
        if 'SolidAngleValue' in base_prop:
            proplist['SolidAngleValue'] = abs(proplist['SolidAngle'])
        if 'SolidAngleSign' in base_prop:
            ref = proplist['SolidAngle']
            proplist['SolidAngleSign'] = '0' if ref == 0 else (
                '+' if ref > 0 else '-')
        # get str or vec for later hashing
        prop = {k: v for (k, v) in proplist.items() if k in base_prop}
        if hash_type == 'str':
            for key in prop:
                prop[key] = '%.2f' % (
                    prop[key] /
                    len(aids)) if key != 'SolidAngleSign' else prop[key]
        elif hash_type != 'vec':
            print('Wrong hash type!')
            return proplist

        return [v for (k, v) in prop.items()]
示例#2
0
def getOriginalIdentifiers(mol,
                           prop=[
                               'AtomicNumber', 'AtomicMass',
                               'TotalConnections', 'HCount',
                               'HeavyNeighborCount', 'FormalCharge',
                               'DeltaMass', 'IsTerminalAtom', 'SolidAngle',
                               'SolidAngleValue', 'SolidAngleSign'
                           ],
                           sa_dict=None,
                           includeAtoms=None,
                           radius=2,
                           hash_type='str',
                           idf_power=64):
    """Compute the original identifiers for atoms in a molecule based on atomic properties. 
       Note it only includes HEAVY atoms.
    Parameters:
        mol - rdkit.Chem.rdchem.Mol molecule
        prop - atomic property list
               'AtomicNumber': the atomic number of atom
               'AtomicMass': the mass of atom
               'TotalConnections': the degree of the atom in the molecule including Hs
               'HeavyNeighborCount': the number of heavy (non-hydrogen) neighbor atoms
               'HCount': the number of attached hydrogens (both implicit and explicit)
               'FormalCharge': the formal charge of atom
               'DeltaMass': the difference between atomic mass and atomic weight (weighted average of atomic masses)
               'IsTerminalAtom': indicates whether the atom is a terminal atom
               'SolidAngle': the solid angle of the atom on the molecule surface (> 0: convex, < 0: concave)
               'SolidAngleValue': the absolute solid angle of the atom on the molecule surface
               'SolidAngleSign': the sign of solid angle of the atom (-1, 0, 1)
        sa_dict - a dictionary mapping atom indices to their solid angles
        includeAtoms - atom indices for getting identifiers
        radius - ECFP radius, only calculates the identifiers of atoms in the neighborhoods (of radius) of included atoms (includeAtoms)
        hash_type - type for hash the properties, can be 'str' or 'vec'
        idf_power - power for the 'str' hash type (default 64-bit integers)
    Returns an dictionary mapping each heavy-atom index to an integer representing the atomic properties
    """
    tbl = GetPeriodicTable()
    idf_dict = {}
    nAtoms = mol.GetNumAtoms()
    if includeAtoms is None:
        indices = range(nAtoms)
    else:
        indices = includeAtoms
    for i in indices:
        index = int(i)
        env = list(
            Chem.FindAtomEnvironmentOfRadiusN(mol, radius, index, useHs=True))
        env_aids = set(
            [mol.GetBondWithIdx(bid).GetBeginAtomIdx() for bid in env] +
            [mol.GetBondWithIdx(bid).GetEndAtomIdx() for bid in env])
        for aid in env_aids:
            if (aid, 0) not in idf_dict:
                atom = mol.GetAtomWithIdx(aid)
                if atom.GetAtomicNum() > 1:
                    properties = []
                    if 'AtomicNumber' in prop:
                        properties.append(atom.GetAtomicNum())
                    if 'AtomicMass' in prop:
                        tmp_prop = atom.GetMass(
                        ) if hash_type == 'vec' else '%.2f' % atom.GetMass()
                        properties.append(tmp_prop)
                    if 'TotalConnections' in prop:
                        properties.append(atom.GetDegree())
                    if 'HCount' in prop:
                        properties.append(atom.GetNumExplicitHs())
                    if 'HeavyNeighborCount' in prop:
                        properties.append(
                            len([
                                bond.GetOtherAtom(atom)
                                for bond in atom.GetBonds()
                                if bond.GetOtherAtom(atom).GetAtomicNum() > 1
                            ]))
                    if 'FormalCharge' in prop:
                        tmp_prop = atom.GetFormalCharge(
                        ) if hash_type == 'vec' else '%.2f' % atom.GetFormalCharge(
                        )
                        properties.append(tmp_prop)
                    if 'DeltaMass' in prop:
                        tmp_prop = atom.GetMass() - tbl.GetAtomicWeight(
                            atom.GetAtomicNum())
                        tmp_prop = tmp_prop if hash_type == 'vec' else '%.2f' % tmp_prop
                        properties.append()
                    if 'IsTerminalAtom' in prop:
                        is_terminal_atom = 1 if atom.GetDegree() == 1 else 0
                        properties.append(is_terminal_atom)
                    if len([p for p in prop if 'SolidAngle' in p]) > 0:
                        sa = sa_dict[aid]
                        solang = 0 if (sa is None) else sa
                        if 'SolidAngle' in prop:
                            tmp_prop = solang if hash_type == 'vec' else '%.2f' % solang
                            properties.append(tmp_prop)
                        elif 'SolidAngleValue' in prop:
                            tmp_prop = abs(
                                solang
                            ) if hash_type == 'vec' else '%.2f' % abs(solang)
                            properties.append(tmp_prop)
                        else:
                            solang_sign = '0' if (sa in [None, 0]) else (
                                '+' if sa > 0 else '-')
                            properties.append(solang_sign)

                    if hash_type == 'str':
                        idf = hash_ecfp(ecfp=','.join(
                            [str(p) for p in properties]),
                                        power=idf_power)
                    elif hash_type == 'vec':
                        idf = hash(tuple(properties))
                    else:
                        print('Wrong hash type!')
                        return idf_dict

                    idf_dict[(aid, 0)] = idf

    return idf_dict
示例#3
0
def mol_to_dgl(mol):
    """Featurizes an rdkit mol object to a DGL Graph, with node and edge features

    Parameters
    ----------
    mol : rdkit mol

    Returns
    -------
    dgl.graph
    """
    g = dgl.DGLGraph()
    g.add_nodes(mol.GetNumAtoms())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    # Atom features

    atom_features = []

    pd = GetPeriodicTable()
    # ComputeGasteigerCharges(mol)

    for atom in mol.GetAtoms():
        atom_feat = []
        atom_type = [0] * len(ATOM_TYPES)
        atom_type[ATOM_TYPES.index(atom.GetSymbol())] = 1

        chiral = [0] * len(CHIRALITY)
        chiral[CHIRALITY.index(atom.GetChiralTag())] = 1

        ex_valence = atom.GetExplicitValence()
        charge = atom.GetFormalCharge()

        hybrid = [0] * len(HYBRIDIZATION)
        hybrid[HYBRIDIZATION.index(atom.GetHybridization())] = 1

        degree = atom.GetDegree()
        valence = atom.GetImplicitValence()
        aromatic = int(atom.GetIsAromatic())
        ex_hs = atom.GetNumExplicitHs()
        im_hs = atom.GetNumImplicitHs()
        rad = atom.GetNumRadicalElectrons()
        ring = int(atom.IsInRing())

        mass = pd.GetAtomicWeight(atom.GetSymbol())
        vdw = pd.GetRvdw(atom.GetSymbol())
        # pcharge = float(atom.GetProp("_GasteigerCharge"))

        atom_feat.extend(atom_type)
        atom_feat.extend(chiral)
        atom_feat.append(ex_valence)
        atom_feat.append(charge)
        atom_feat.extend(hybrid)
        atom_feat.append(degree)
        atom_feat.append(valence)
        atom_feat.append(aromatic)
        atom_feat.append(ex_hs)
        atom_feat.append(im_hs)
        atom_feat.append(rad)
        atom_feat.append(ring)
        atom_feat.append(mass)
        atom_feat.append(vdw)
        # atom_feat.append(pcharge)
        atom_features.append(atom_feat)

    for bond in mol.GetBonds():
        g.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())

    g.ndata["feat"] = torch.FloatTensor(atom_features)

    # Bond features

    bond_features = []
    for bond in mol.GetBonds():
        bond_feat = []

        bond_type = [0] * len(BOND_TYPES)
        bond_type[BOND_TYPES.index(bond.GetBondType())] = 1

        bond_stereo = [0] * len(BOND_STEREO)
        bond_stereo[BOND_STEREO.index(bond.GetStereo())] = 1

        bond_feat.extend(bond_type)
        bond_feat.extend(bond_stereo)
        bond_feat.append(float(bond.GetIsConjugated()))
        bond_feat.append(float(bond.IsInRing()))
        bond_features.append(bond_feat)

    g.edata["feat"] = torch.FloatTensor(bond_features)
    return g