Exemplo n.º 1
0
def create_feature_matrix(mol,
                          atom_num_limit,
                          use_electronegativity=False,
                          use_gaff=False,
                          use_sybyl=False,
                          use_gasteiger=False,
                          use_tfrecords=False,
                          degree_dim=17,
                          en_list=None):
    if use_gaff:
        #Chem.SanitizeMol(mol)
        feature = mol_gaff_features(mol)
    else:
        if use_sybyl or use_gasteiger:
            Chem.SanitizeMol(mol)
        if use_gasteiger:
            ComputeGasteigerCharges(mol)
        feature = [
            atom_features(atom,
                          en_list=en_list,
                          use_sybyl=use_sybyl,
                          use_electronegativity=use_electronegativity,
                          use_gasteiger=use_gasteiger,
                          degree_dim=degree_dim) for atom in mol.GetAtoms()
        ]
    if not use_tfrecords:
        for _ in range(atom_num_limit - len(feature)):
            feature.append(np.zeros(len(feature[0]), dtype=np.int8))
    return feature
Exemplo n.º 2
0
def fitGasteigerCharges(mol, atom_types=None):
    """
    Fit Gasteiger atomic charges

    Parameters
    ----------
    mol: Molecule
        Molecule to fit the charges

    Return
    ------
    results: Molecule
        Copy of the molecule with the charges set

    Examples
    --------
    >>> from parameterize.home import home
    >>> from moleculekit.molecule import Molecule
    >>> molFile = os.path.join(home('test-charge'), 'H2O.mol2')
    >>> mol = Molecule(molFile)
    >>> mol.charge[:] = 0

    >>> new_mol = fitGasteigerCharges(mol)
    >>> assert new_mol is not mol
    >>> new_mol.charge # doctest: +ELLIPSIS
    array([-0.411509...,  0.205754...,  0.205754...], dtype=float32)

    >>> new_mol = fitGasteigerCharges(mol, atom_types=mol.atomtype)
    >>> assert new_mol is not mol
    >>> new_mol.charge # doctest: +ELLIPSIS
    array([-0.411509...,  0.205754...,  0.205754...], dtype=float32)
    """

    from moleculekit.smallmol.smallmol import SmallMol
    from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges

    if not isinstance(mol, Molecule):
        raise TypeError('"mol" has to be instance of {}'.format(Molecule))
    if mol.numFrames != 1:
        raise ValueError(
            '"mol" can have just one frame, but it has {}'.format(mol.numFrames)
        )

    # Set atom types to elements, overwise rdkit refuse to read a MOL2 file
    htmd_mol = mol.copy()
    if atom_types is not None:
        htmd_mol.atomtype = atom_types

    # Compute and store Gasteiger charges
    sm = SmallMol(
        htmd_mol, fixHs=False, removeHs=False, verbose=False, force_reading=True
    )
    ComputeGasteigerCharges(sm._mol, throwOnParamFailure=True)
    mol = mol.copy()
    mol.charge[:] = [
        atom.GetDoubleProp("_GasteigerCharge") for atom in sm._mol.GetAtoms()
    ]

    return mol
def mol_to_nx(mol) -> nx.Graph:
    G = nx.Graph()
    conf = mol.GetConformer()

    SanitizeMol(mol,
                SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_PROPERTIES)

    ComputeGasteigerCharges(mol)
    ring_info = mol.GetRingInfo()
    crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol)
    tpsa_contribs = rdMolDescriptors._CalcTPSAContribs(mol)

    for atom in mol.GetAtoms():
        idx = atom.GetIdx()

        # if atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2:
        #     formal_charge = -1
        # elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 4:
        #     formal_charge = 1
        # elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1:
        #     formal_charge = -1
        # else:
        #     formal_charge = atom.GetFormalCharge()
        formal_charge = atom.GetFormalCharge()

        G.add_node(
            idx,
            pos=conf.GetAtomPosition(idx),
            formal_charge=formal_charge,
            chiral_tag=atom.GetChiralTag(),
            hybridization=atom.GetHybridization(),
            # num_explicit_hs=atom.GetNumExplicitHs(),  # All same
            is_aromatic=atom.GetIsAromatic(),
            num_atom_rings=ring_info.NumAtomRings(idx),
            is_in_ring_size3=atom.IsInRingSize(3),
            is_in_ring_size4=atom.IsInRingSize(4),
            is_in_ring_size5=atom.IsInRingSize(5),
            is_in_ring_size6=atom.IsInRingSize(6),
            symbol=atom.GetSymbol(),
            total_valence=atom.GetTotalValence(),
            gasteiger_charge=atom.GetProp('_GasteigerCharge'),
            num_implicit_hs=atom.GetNumImplicitHs(),
            total_degree=atom.GetTotalDegree(),
            crippen_logp=crippen_contribs[idx][0],
            crippen_mr=crippen_contribs[idx][1],
            tpsa=tpsa_contribs[idx],
        )

    for bond in mol.GetBonds():
        G.add_edge(
            bond.GetBeginAtomIdx(),
            bond.GetEndAtomIdx(),
            bond_type=bond.GetBondType(),
            is_conjugated=bond.GetIsConjugated(),
        )

    return G
Exemplo n.º 4
0
def fitGasteigerCharges(mol):
    """
    Fit Gasteiger atomic charges

    Parameters
    ----------
    mol: Molecule
        Molecule to fit the charges

    Return
    ------
    results: Molecule
        Copy of the molecule with the charges set

    Examples
    --------
    >>> from htmd.home import home
    >>> from htmd.molecule.molecule import Molecule
    >>> molFile = os.path.join(home('test-charge'), 'H2O.mol2')
    >>> mol = Molecule(molFile)
    >>> mol.charge[:] = 0

    >>> new_mol = fitGasteigerCharges(mol)
    >>> assert new_mol is not mol
    >>> new_mol.charge # doctest: +ELLIPSIS
    array([-0.411509...,  0.205754...,  0.205754...], dtype=float32)
    """

    from rdkit.Chem.rdmolfiles import MolFromMol2File
    from rdkit.Chem.rdPartialCharges import ComputeGasteigerCharges

    if not isinstance(mol, Molecule):
        raise TypeError('"mol" has to be instance of {}'.format(Molecule))
    if mol.numFrames != 1:
        raise ValueError('"mol" can have just one frame, but it has {}'.format(
            mol.numFrames))

    # Set atom types to elements, overwise rdkit refuse to read a MOL2 file
    htmd_mol = mol.copy()
    htmd_mol.atomtype = htmd_mol.element

    # Convert Molecule to rdkit Mol
    with TemporaryDirectory() as tmpDir:
        filename = os.path.join(tmpDir, 'mol.mol2')
        htmd_mol.write(filename)
        rdkit_mol = MolFromMol2File(filename, removeHs=False)
    assert mol.numAtoms == rdkit_mol.GetNumAtoms()

    # Compute and store Gasteiger charges
    ComputeGasteigerCharges(rdkit_mol, throwOnParamFailure=True)
    mol = mol.copy()
    mol.charge[:] = [
        atom.GetDoubleProp('_GasteigerCharge')
        for atom in rdkit_mol.GetAtoms()
    ]

    return mol
Exemplo n.º 5
0
    def calculate(self):
        if getattr(self.prop, "gasteiger_charges", False):
            ComputeGasteigerCharges(self.mol)

        r = atoms_to_numpy(self.prop, self.mol)

        nans = np.isnan(r)
        if np.any(nans):
            atms = set(
                np.array([a.GetSymbol() for a in self.mol.GetAtoms()])[nans])
            self.fail(
                ValueError("missing {} for {}".format(self.get_long(),
                                                      list(atms))))

        return r
Exemplo n.º 6
0
def create_feature_matrix(mol, args, en_list=None):
    if args.use_sybyl or args.use_gasteiger:
        Chem.SanitizeMol(mol)
    if args.use_gasteiger:
        ComputeGasteigerCharges(mol)
    feature = [atom_features(atom,
        en_list=en_list,
        use_sybyl=args.use_sybyl,
        use_electronegativity=args.use_electronegativity,
        use_gasteiger=args.use_gasteiger,
        degree_dim=args.degree_dim) for atom in mol.GetAtoms()]
    if not args.tfrecords:
        for _ in range(args.atom_num_limit - len(feature)):
            feature.append(np.zeros(len(feature[0]), dtype=np.int))
    return feature
Exemplo n.º 7
0
    def add_mol(self, mol, add_atom_types=True):
        """
        Adds molecule to the dataset.
        Returns True if successful, otherwise False.
        """
        ComputeGasteigerCharges(mol)
        gast = np.array(
            [a.GetDoubleProp("_GasteigerCharge") for a in mol.GetAtoms()])
        if np.isnan(gast).any():
            ## failed to compute partial charges
            return False
        """Whether to add atom types."""
        if add_atom_types:
            for a in mol.GetAtoms():
                self.add_atom_type(a.GetAtomicNum())

        ## adding labels
        if self.node_prop is not None:
            if self.node_labels is None:
                self.node_labels = []
            labels = torch.FloatTensor(
                [a.GetDoubleProp(self.node_prop) for a in mol.GetAtoms()])
            self.node_labels.append(labels)

        nodes = np.array([
            self.node_types.get(
                mol.GetAtomWithIdx(i).GetAtomicNum(), self.default_type)
            for i in range(mol.GetNumAtoms())
        ])
        nodes = np.concatenate([[self.node_types["pool"]], nodes])
        self.node_ids.append(torch.LongTensor(nodes))

        ## node features
        num_feat = 21
        if self.weave_features:
            num_feat += 4

        X = np.zeros((nodes.shape[0], num_feat), dtype=np.float32)

        fc = np.array([a.GetFormalCharge() for a in mol.GetAtoms()])
        X[1:, 0:3] = onehot(fc, -1, 1)

        hb = np.array([a.GetHybridization() for a in mol.GetAtoms()])
        X[1:, 3:8] = onehot(hb, 0, 4)

        ev = np.array([a.GetExplicitValence() for a in mol.GetAtoms()])
        X[1:, 8:14] = onehot(ev, 0, 5)

        X[1:, 14] = [a.GetIsAromatic() for a in mol.GetAtoms()]

        X[1:, 15] = [a.IsInRingSize(3) for a in mol.GetAtoms()]
        X[1:, 16] = [a.IsInRingSize(4) for a in mol.GetAtoms()]
        X[1:, 17] = [a.IsInRingSize(5) for a in mol.GetAtoms()]
        X[1:, 18] = [a.IsInRingSize(6) for a in mol.GetAtoms()]

        X[1:, 19] = [a.IsInRing() for a in mol.GetAtoms()]

        X[1:, 20] = gast

        assert np.isnan(X[1:,
                          20]).any() == False, "Found NaN in GasteigerCharges"

        ## adding Hacceptors and Hdonors
        if self.weave_features:
            mha = mol.GetSubstructMatches(self.ha)
            mhd = mol.GetSubstructMatches(self.hd)
            assert all(len(x) == 1
                       for x in mha), "Hacceptor returned more than one atom"
            assert all(len(x) == 1
                       for x in mhd), "Hdonor returned more than one atom"
            ha_idx = [m[0] + 1 for m in mha]
            hd_idx = [m[0] + 1 for m in mhd]
            X[ha_idx, 21] = 1.0
            X[hd_idx, 22] = 1.0

            ## chiral centers
            chiral = Chem.FindMolChiralCenters(mol)
            r_idx = [c[0] + 1 for c in chiral if c[1] == "R"]
            s_idx = [c[0] + 1 for c in chiral if c[1] == "S"]
            X[r_idx, 23] = 1.0
            X[s_idx, 24] = 1.0

        self.node_features.append(torch.FloatTensor(X))

        ## route information
        A = np.zeros((mol.GetNumAtoms(), mol.GetNumAtoms()), dtype=np.float32)
        Aconj = np.zeros_like(A)
        A1 = np.zeros_like(A)
        A2 = np.zeros_like(A)
        A3 = np.zeros_like(A)
        Aarom = np.zeros_like(A)
        Aflex = np.zeros_like(A)

        for bond in mol.GetBonds():
            i = bond.GetBeginAtomIdx()
            j = bond.GetEndAtomIdx()
            A[i, j] = 1
            A[j, i] = 1
            Aflex[i, j] = 1
            Aflex[j, i] = 1
            if bond.GetIsConjugated():
                Aconj[i, j] = 1
                Aconj[j, i] = 1
            if bond.GetIsAromatic():
                Aarom[i, j] = 1
                Aarom[j, i] = 1
            if bond.GetBondType() == rdkit.Chem.rdchem.BondType.SINGLE:
                A1[i, j] = 1
                A1[j, i] = 1
                if not bond.GetIsConjugated():
                    ## is there a flexible shortest route
                    Aflex[i, j] = 0.99
                    Aflex[j, i] = 0.99
            elif bond.GetBondType() == rdkit.Chem.rdchem.BondType.DOUBLE:
                A2[i, j] = 1
                A2[j, i] = 1
            elif bond.GetBondType() == rdkit.Chem.rdchem.BondType.TRIPLE:
                A3[i, j] = 1
                A3[j, i] = 1

        self.adj.append(torch.FloatTensor(A))

        num_route_features = 16
        if self.bond_types:
            num_route_features += 3

        dists = comp_dists(A, dmax=13, self_loop=True)
        dists_conj = comp_dists(Aconj, dmax=Aconj.shape[0], self_loop=False)
        dists1 = comp_dists(A1, dmax=13, self_loop=False)
        dists2 = comp_dists(A2, dmax=13, self_loop=False)
        costs_flex = min_cost(Aflex, dmax=13)
        rigid = (np.abs(np.round(costs_flex) - costs_flex) <
                 1e-4) & (costs_flex < 13)
        np.fill_diagonal(rigid, False)

        route = np.zeros((A.shape[0] + 1, A.shape[0] + 1, num_route_features),
                         dtype=np.float32)
        route[1:, 1:, 0] = (dists == 0)
        route[1:, 1:, 1] = (dists == 1)
        route[1:, 1:, 2] = (dists == 2)
        route[1:, 1:, 3] = (dists == 3)
        route[1:, 1:, 4] = (dists == 4)
        route[1:, 1:, 5] = (5 <= dists) & (dists <= 6)
        route[1:, 1:, 6] = (7 <= dists) & (dists <= 8)
        route[1:, 1:, 7] = (9 <= dists) & (dists <= 12)
        route[1:, 1:, 8] = 13 <= dists

        route[1:, 1:, 9] = dists_conj <= 4
        route[1:, 1:,
              10] = (5 <= dists_conj) & (dists_conj < dists_conj.shape[0])
        route[1:, 1:, 11] = dists1 < 13
        route[1:, 1:, 12] = dists2 < 13
        route[1:, 1:, 13] = A3
        route[1:, 1:, 14] = rigid

        sssr = rdkit.Chem.rdmolops.GetSymmSSSR(mol)
        for ring in sssr:
            for a0 in ring:
                for a1 in ring:
                    if a0 == a1:
                        continue
                    route[a0 + 1, a1 + 1, 15] = 1

        if self.bond_types:
            route[1:, 1:, 16] = A1
            route[1:, 1:, 17] = A2
            route[1:, 1:, 18] = Aarom

        ## TODO: add 1+ queries
        self.route_features.append(torch.FloatTensor(route))

        ## if pool_dist is 0.0 then all heads can talk
        ## between pool and nodes
        ## TODO: try pool_dist 999.0
        pool_dist = 0.0
        D = np.zeros(
            (A.shape[0] + 1, A.shape[0] + 1), dtype=np.float32) + pool_dist
        D[1:, 1:] = dists
        self.dists.append(torch.FloatTensor(D))
        return True
Exemplo n.º 8
0
def MolToMol2Block(mol, confId=-1, addHs=True, addCharges=True):
    """Returns a Mol2 string block for a molecule
      ARGUMENTS:

        - mol: the molecule
        - confId: (optional) selects which conformation to output (-1 = default)
                  if set to None will return all conformers

      RETURNS:

        a string
    """

    #
    # References
    # - Format specs http://www.tripos.com/data/support/mol2.pdf
    # - Atom typing http://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
    #

    confIds = (confId, )

    if confId == None:
        confIds = [
        ]  #RB: this is a bit dirty, because I make a list out of a tuple
        confId = mol.GetNumConformers()
        for i in range(0, confId):
            confIds.append(i)

    blocks = []

    # add explicit hydrogens (since mol2 reader requires them)
    if addHs:
        h_coords = mol.GetNumConformers() > 0 and mol.GetConformer(-1).Is3D()
        try:
            mol = AddHs(mol, addCoords=h_coords)
        except RuntimeError:
            mol = AddHs(mol, addCoords=False)

    # compute charges
    if addCharges:
        ComputeGasteigerCharges(mol)

    for confId in confIds:
        molecule = """@<TRIPOS>MOLECULE
{}
{} {} 0 0 0
SMALL
GASTEIGER\n\n""".format(
            mol.GetProp("_Name") if mol.HasProp("_Name") else "UNK",
            mol.GetNumAtoms(), mol.GetNumBonds())

        # FIXME "USER_CHARGES" could become 'Gasteiger charges'
        # FIXME "SMALL" means small molecule but could become "PROTEIN"

        pos = _get_positions(mol, confId)
        atom_lines = [
            "{:>4} {:>4} {:>13.4f} {:>9.4f} {:>9.4f} {:<5} {} {} {:>7.4f}".
            format(
                a.GetIdx() + 1, a.GetSymbol(), float(pos[a.GetIdx()][0]),
                float(pos[a.GetIdx()][1]), float(pos[a.GetIdx()][2]),
                _sybyl_atom_type(a), 1, "UNL",
                float(a.GetProp('_GasteigerCharge').replace(',', '.'))
                if a.HasProp('_GasteigerCharge') else 0.0)
            for a in mol.GetAtoms()
        ]
        atom_lines = ["@<TRIPOS>ATOM"] + atom_lines
        atom_lines = "\n".join(atom_lines) + "\n"

        bond_lines = [
            "{:>5} {:>5} {:>5} {:>2}".format(
                bid + 1,
                b.GetBeginAtomIdx() + 1,
                b.GetEndAtomIdx() + 1,
                "ar" if b.GetBondTypeAsDouble() == 1.5 else
                "am" if _amide_bond(b) else str(int(b.GetBondTypeAsDouble())))
            for bid, (b) in enumerate(mol.GetBonds())
        ]
        bond_lines = ["@<TRIPOS>BOND"] + bond_lines + ["\n"]
        bond_lines = "\n".join(bond_lines)

        block = molecule + atom_lines + bond_lines
        blocks.append(block)
    return "".join(blocks)