Пример #1
0
def SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5):
    """Calculates structural protein-ligand interaction fingerprint (SPLIF),
    based on http://pubs.acs.org/doi/abs/10.1021/ci500319f.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
            Molecules, which are analysed in order to find interactions.
    depth : int (deafult = 1)
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.
    size: int (default = 4096)
        SPLIF is folded to given size.
    distance_cutoff: float (default=4.5)
        Cutoff distance for close contacts.

    Returns
    -------
    SPLIF : numpy array
        Calculated SPLIF.shape = (no. of atoms, ). Every row consists of three elements:
            row[0] = index of hashed atoms
            row[1].shape = (5, 3) -> ligand's atom coords and 4 his neigbor's
            row[2].shape = (5, 3) -> protein's atom coords and 4 his neigbor's

    """

    # removing h
    protein_dict = protein.atom_dict[protein.atom_dict['atomicnum'] != 1]
    ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1]

    protein_atoms, ligand_atoms = close_contacts(protein_dict,
                                                 ligand_dict,
                                                 cutoff=distance_cutoff)
    splif = np.zeros((len(ligand_atoms)),
                     dtype=[('hash', int),
                            ('ligand_coords', np.float32, (5, 3)),
                            ('protein_coords', np.float32, (5, 3))])
    for i, (ligand_atom,
            protein_atom) in enumerate(zip(ligand_atoms, protein_atoms)):
        if ligand_atom['atomicnum'] == 1 or protein_atom['atomicnum'] == 1:
            continue
        # function sorted used below solves isue, when order of parameteres
        # is not correct -> splif(protein, ligand)
        splif[i] = (hash32(
            tuple(
                sorted((_ECFP_atom_hash(ligand,
                                        int(ligand_atom['id']),
                                        depth=depth)[-1],
                        _ECFP_atom_hash(protein,
                                        int(protein_atom['id']),
                                        depth=depth)[-1])))),
                    np.vstack((ligand_atom['coords'].reshape(
                        (1, 3)), ligand_atom['neighbors'])),
                    np.vstack((protein_atom['coords'].reshape(
                        (1, 3)), protein_atom['neighbors'])))

    # folding
    splif['hash'] = fold(splif['hash'], size)
    return np.sort(splif)
Пример #2
0
def test_close_contacts():
    """Close contacts test"""
    cc = [len(close_contacts(rec.atom_dict[rec.atom_dict['atomicnum'] != 1],
                             mol.atom_dict[mol.atom_dict['atomicnum'] != 1],
                             cutoff=3)[0]) for mol in mols]
    assert_array_equal(cc,
                       [5, 7, 6, 5, 3, 6, 5, 6, 6, 6, 5, 4, 7, 6, 6, 6, 7, 5,
                        6, 5, 5, 7, 4, 5, 6, 7, 6, 5, 7, 5, 6, 4, 5, 4, 3, 7,
                        6, 6, 3, 5, 4, 3, 1, 7, 3, 2, 4, 1, 2, 7, 4, 4, 6, 4,
                        6, 7, 7, 6, 6, 6, 5, 6, 5, 4, 4, 7, 3, 6, 6, 4, 7, 7,
                        4, 5, 4, 7, 3, 6, 6, 6, 5, 6, 4, 5, 4, 4, 6, 5, 5, 7,
                        6, 2, 6, 5, 1, 8, 6, 5, 7, 4])
Пример #3
0
def test_close_contacts():
    """Close contacts test"""
    cc = [
        len(
            close_contacts(rec.atom_dict[rec.atom_dict['atomicnum'] != 1],
                           mol.atom_dict[mol.atom_dict['atomicnum'] != 1],
                           cutoff=3)[0]) for mol in mols
    ]
    assert_array_equal(cc, [
        5, 7, 6, 5, 3, 6, 5, 6, 6, 6, 5, 4, 7, 6, 6, 6, 7, 5, 6, 5, 5, 7, 4, 5,
        6, 7, 6, 5, 7, 5, 6, 4, 5, 4, 3, 7, 6, 6, 3, 5, 4, 3, 1, 7, 3, 2, 4, 1,
        2, 7, 4, 4, 6, 4, 6, 7, 7, 6, 6, 6, 5, 6, 5, 4, 4, 7, 3, 6, 6, 4, 7, 7,
        4, 5, 4, 7, 3, 6, 6, 6, 5, 6, 4, 5, 4, 4, 6, 5, 5, 7, 6, 2, 6, 5, 1, 8,
        6, 5, 7, 4
    ])
Пример #4
0
    def build(self, ligands, protein=None):
        """ Descriptor building method

        Parameters
        ----------
            ligands: array-like
                An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed

            protein: oddt.toolkit.Molecule object (default=None)
                Protein object to be used while generating descriptors.
                If none, then the default protein (from constructor) is used.
                Otherwise, protein becomes new global and default protein.

        Returns
        -------
            descs: numpy array, shape=[n_samples, 351]
                An array of binana descriptors, aligned with input ligands
        """
        if protein:
            self.set_protein(protein)
        else:
            protein = self.protein
        protein_dict = protein.atom_dict
        desc = None
        for mol in ligands:
            mol_dict = mol.atom_dict
            vec = np.array([], dtype=float)
            vec = tuple()
            # Vina
            # TODO: Asynchronous output from vina, push command to score and retrieve at the end?
            # TODO: Check if ligand has vina scores
            vec += tuple(self.vina.build(mol).flatten())

            # Close Contacts (<4A)
            vec += tuple(self.cc_4.build(mol).flatten())

            # Electrostatics (<4A)
            ele_rec_types, ele_lig_types = zip(*self.ele_types)
            ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types,
                                          'atom_types_ad4')
            ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types,
                                          'atom_types_ad4')
            ele = tuple()
            for r_t, m_t in self.ele_types:
                mol_ele_dict, rec_ele_dict = close_contacts(
                    ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4)
                if len(mol_ele_dict) and len(rec_ele_dict):
                    ele += (mol_ele_dict['charge'] * rec_ele_dict['charge'] /
                            np.sqrt((mol_ele_dict['coords'] -
                                     rec_ele_dict['coords'])**2).sum(axis=-1) *
                            138.94238460104697e4).sum(),  # convert to J/mol
                else:
                    ele += 0,
            vec += tuple(np.nan_to_num(ele))

            # Ligand Atom Types
            atoms = atoms_by_type(mol_dict, self.ligand_atom_types,
                                  'atom_types_ad4')
            vec += tuple([len(atoms[t]) for t in self.ligand_atom_types])

            # Close Contacts (<2.5A)
            vec += tuple(self.cc_25.build(mol).flatten())

            # H-Bonds (<4A)
            hbond_mol, hbond_rec, strict = hbonds(mol, protein, 4)
            # Retain only strict hbonds
            hbond_mol = hbond_mol[strict]
            hbond_rec = hbond_rec[strict]
            backbone = hbond_rec['isbackbone']
            alpha = hbond_rec['isalpha']
            beta = hbond_rec['isbeta']
            other = ~alpha & ~beta
            donor_mol = hbond_mol['isdonor']
            donor_rec = hbond_rec['isdonor']
            hbond_vec = ((donor_mol & backbone
                          & alpha).sum(), (donor_mol & backbone & beta).sum(),
                         (donor_mol & backbone & other).sum(),
                         (donor_mol & ~backbone
                          & alpha).sum(), (donor_mol & ~backbone & beta).sum(),
                         (donor_mol & ~backbone
                          & other).sum(), (donor_rec & backbone & alpha).sum(),
                         (donor_rec & backbone & beta).sum(),
                         (donor_rec & backbone & other).sum(),
                         (donor_rec & ~backbone & alpha).sum(),
                         (donor_rec & ~backbone
                          & beta).sum(), (donor_rec & ~backbone & other).sum())
            vec += tuple(hbond_vec)

            # Hydrophobic contacts (<4A)
            hydrophobic = hydrophobic_contacts(mol, protein, 4)[1]
            backbone = hydrophobic['isbackbone']
            alpha = hydrophobic['isalpha']
            beta = hydrophobic['isbeta']
            other = ~alpha & ~beta
            hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(),
                       (backbone & other).sum(), (~backbone & alpha).sum(),
                       (~backbone & beta).sum(), (~backbone & other).sum(),
                       len(hydrophobic))
            vec += tuple(hyd_vec)

            # Pi-stacking (<7.5A)
            pi_mol, pi_rec, pi_paralel, pi_tshaped = pi_stacking(
                mol, protein, 7.5)
            alpha = pi_rec['isalpha'] & pi_paralel
            beta = pi_rec['isbeta'] & pi_paralel
            other = ~alpha & ~beta & pi_paralel
            pi_vec = (alpha.sum(), beta.sum(), other.sum())
            vec += tuple(pi_vec)

            # T-shaped Pi-Pi interaction
            alpha = pi_rec['isalpha'] & pi_tshaped
            beta = pi_rec['isbeta'] & pi_tshaped
            other = ~alpha & ~beta & pi_tshaped
            pi_t_vec = (alpha.sum(), beta.sum(), other.sum())

            # Pi-cation (<6A)
            pi_rec, cat_mol, strict = pi_cation(protein, mol, 6)
            alpha = pi_rec['isalpha'] & strict
            beta = pi_rec['isbeta'] & strict
            other = ~alpha & ~beta & strict
            pi_cat_vec = (alpha.sum(), beta.sum(), other.sum())

            pi_mol, cat_rec, strict = pi_cation(mol, protein, 6)
            alpha = cat_rec['isalpha'] & strict
            beta = cat_rec['isbeta'] & strict
            other = ~alpha & ~beta & strict
            pi_cat_vec += (alpha.sum(), beta.sum(), other.sum())

            vec += tuple(pi_cat_vec)

            # T-shape (perpendicular Pi's) (<7.5A)
            vec += tuple(pi_t_vec)

            # Active site flexibility (<4A)
            acitve_site = close_contacts(
                mol_dict[mol_dict['atomicnum'] != 1],
                protein_dict[protein_dict['atomicnum'] != 1],
                cutoff=4)[1]
            backbone = acitve_site['isbackbone']
            alpha = acitve_site['isalpha']
            beta = acitve_site['isbeta']
            other = ~alpha & ~beta
            as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(),
                       (backbone & other).sum(), (~backbone & alpha).sum(),
                       (~backbone & beta).sum(), (~backbone & other).sum(),
                       len(acitve_site))
            vec += tuple(as_flex)

            # Salt bridges (<5.5)
            salt_bridge_dict = salt_bridges(mol, protein, 5.5)[1]
            vec += (salt_bridge_dict['isalpha'].sum(),
                    salt_bridge_dict['isbeta'].sum(),
                    (~salt_bridge_dict['isalpha']
                     & ~salt_bridge_dict['isbeta']).sum(),
                    len(salt_bridge_dict))

            # Rotatable bonds
            vec += mol.num_rotors,

            if desc is None:
                desc = np.zeros(len(vec), dtype=float)
            desc = np.vstack((desc, np.array(vec, dtype=float)))

        return desc[1:]
Пример #5
0
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5,
         size=16384, count_bits=True, sparse=True, ignore_hoh=True, bits_info=None):
    """Protein ligand extended connectivity fingerprint. For every pair of
    atoms in contact, compute ECFP and then hash every single, corresponding
    depth.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
            Molecules, which are analysed in order to find interactions.

    depth_ligand, depth_protein : int (deafult = (2, 4))
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.

    size: int (default = 16384)
        SPLIF is folded to given size.

    distance_cutoff: float (default=4.5)
        Cutoff distance for close contacts.

    sparse: bool (default = True)
        Should fingerprints be dense (contain all bits) or sparse (just the on
        bits).

    count_bits: bool (default = True)
        Should the bits be counted or unique. In dense representation it
        translates to integer array (count_bits=True) or boolean array if False.

    ignore_hoh: bool (default = True)
        Should the water molecules be ignored. This is based on the name of the
        residue ('HOH').

    bits_info: dict or None (default = None)
        If dictionary is provided it is filled with information about bit contents.
        Root atom index and depth is provided for both ligand and protein.
        Dictionary is modified in-place.

    Returns
    -------
    PLEC: numpy array
        fp (size = atoms in contacts * max(depth_protein, depth_ligand))

    """
    result = []
    bit_info_content = []

    # removing h
    protein_mask = protein_no_h = (protein.atom_dict['atomicnum'] != 1)
    if ignore_hoh:
        # a copy is needed, so not modifing inplace
        protein_mask = protein_mask & (protein.atom_dict['resname'] != 'HOH')
    protein_dict = protein.atom_dict[protein_mask]
    ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1]

    # atoms in contact
    protein_atoms, ligand_atoms = close_contacts(
        protein_dict, ligand_dict, cutoff=distance_cutoff)

    lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, aidx)
                     for aidx in ligand_dict['id'].tolist()}
    # HOH residues might be connected to metal atoms
    prot_atom_repr = {aidx: _ECFP_atom_repr(protein, aidx)
                      for aidx in protein.atom_dict[protein_no_h]['id'].tolist()}

    for ligand_atom, protein_atom in zip(ligand_atoms['id'].tolist(),
                                         protein_atoms['id'].tolist()):
        ligand_ecfp = _ECFP_atom_hash(ligand,
                                      ligand_atom,
                                      depth=depth_ligand,
                                      atom_repr_dict=lig_atom_repr)
        protein_ecfp = _ECFP_atom_hash(protein,
                                       protein_atom,
                                       depth=depth_protein,
                                       atom_repr_dict=prot_atom_repr)
        assert len(ligand_ecfp) == depth_ligand + 1
        assert len(protein_ecfp) == depth_protein + 1
        # fillvalue is parameter from zip_longest
        # it's used, when ligand_ecfp and protein_ecfp are not the same size,
        # so if one is shorter the last given ECFP is used
        if depth_ligand < depth_protein:
            fillvalue = depth_ligand, ligand_ecfp[-1]
        else:
            fillvalue = depth_protein, protein_ecfp[-1]
        for (ligand_depth, ligand_bit), (protein_depth, protein_bit) in zip_longest(
                enumerate(ligand_ecfp), enumerate(protein_ecfp), fillvalue=fillvalue):
            result.append(hash32((ligand_bit, protein_bit)))
            if bits_info is not None:
                bit_info_content.append(PLEC_bit_info_record(
                    ligand_root_atom_idx=ligand_atom,
                    ligand_depth=ligand_depth,
                    protein_root_atom_idx= protein_atom,
                    protein_depth=protein_depth
                ))

    # folding and sorting
    plec = fold(np.array(result), size=size)

    # add bits info after folding
    if bits_info is not None:
        sort_indexes = np.argsort(plec)
        plec = plec[sort_indexes].astype(np.min_scalar_type(size))
        # sort bit info according to folded PLEC
        for bit_number, bit_info_idx in zip(plec, sort_indexes):
            if bit_number not in bits_info:
                bits_info[bit_number] = set()
            bits_info[bit_number].add(bit_info_content[bit_info_idx])
    else:
        plec = np.sort(plec).astype(np.min_scalar_type(size))

    # count_bits
    if not count_bits:
        plec = np.unique(plec)

    # sparse or dense FP
    if not sparse:
        plec = sparse_to_dense(plec, size=size)
    return plec
Пример #6
0
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5,
         size=16384, count_bits=True, sparse=True, ignore_hoh=True):
    """Protein ligand extended connectivity fingerprint. For every pair of
    atoms in contact, compute ECFP and then hash every single, corresponding
    depth.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
            Molecules, which are analysed in order to find interactions.

    depth_ligand, depth_protein : int (deafult = (2, 4))
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.

    size: int (default = 16384)
        SPLIF is folded to given size.

    distance_cutoff: float (default=4.5)
        Cutoff distance for close contacts.

    sparse : bool (default = True)
        Should fingerprints be dense (contain all bits) or sparse (just the on
        bits).

    count_bits : bool (default = True)
        Should the bits be counted or unique. In dense representation it
        translates to integer array (count_bits=True) or boolean array if False.

    ignore_hoh : bool (default = True)
        Should the water molecules be ignored. This is based on the name of the
        residue ('HOH').

    Returns
    -------
    PLEC : numpy array
        fp (size = atoms in contacts * max(depth_protein, depth_ligand))

    """
    result = []
    # removing h
    protein_mask = protein_no_h = (protein.atom_dict['atomicnum'] != 1)
    if ignore_hoh:
        # a copy is needed, so not modifing inplace
        protein_mask = protein_mask & (protein.atom_dict['resname'] != 'HOH')
    protein_dict = protein.atom_dict[protein_mask]
    ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1]

    # atoms in contact
    protein_atoms, ligand_atoms = close_contacts(
        protein_dict, ligand_dict, cutoff=distance_cutoff)

    lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, aidx)
                     for aidx in ligand_dict['id'].tolist()}
    # HOH residues might be connected to metal atoms
    prot_atom_repr = {aidx: _ECFP_atom_repr(protein, aidx)
                      for aidx in protein.atom_dict[protein_no_h]['id'].tolist()}

    for ligand_atom, protein_atom in zip(ligand_atoms['id'].tolist(),
                                         protein_atoms['id'].tolist()):
        ligand_ecfp = _ECFP_atom_hash(ligand,
                                      ligand_atom,
                                      depth=depth_ligand,
                                      atom_repr_dict=lig_atom_repr)
        protein_ecfp = _ECFP_atom_hash(protein,
                                       protein_atom,
                                       depth=depth_protein,
                                       atom_repr_dict=prot_atom_repr)
        assert len(ligand_ecfp) == depth_ligand + 1
        assert len(protein_ecfp) == depth_protein + 1
        # fillvalue is parameter from zip_longest
        # it's used, when ligand_ecfp and protein_ecfp are not the same size,
        # so if one is shorter the last given ECFP is used
        if depth_ligand < depth_protein:
            fillvalue = ligand_ecfp[-1]
        else:
            fillvalue = protein_ecfp[-1]
        for pair in zip_longest(ligand_ecfp, protein_ecfp, fillvalue=fillvalue):
                result.append(hash32(pair))
    # folding and sorting
    plec = np.sort(fold(np.array(result), size=size))

    # count_bits
    if not count_bits:
        plec = np.unique(plec)

    # sparse or dense FP
    if not sparse:
        plec = sparse_to_dense(plec, size=size)
    return plec
Пример #7
0
def SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5):
    """Calculates structural protein-ligand interaction fingerprint (SPLIF),
    based on http://pubs.acs.org/doi/abs/10.1021/ci500319f.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
            Molecules, which are analysed in order to find interactions.
    depth : int (deafult = 1)
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.
    size: int (default = 4096)
        SPLIF is folded to given size.
    distance_cutoff: float (default=4.5)
        Cutoff distance for close contacts.

    Returns
    -------
    SPLIF : numpy array
        Calculated SPLIF.shape = (no. of atoms, ). Every row consists of three
        elements:
            row[0] = index of hashed atoms
            row[1].shape = (7, 3) -> ligand's atom coords and 6 his neigbor's
            row[2].shape = (7, 3) -> protein's atom coords and 6 his neigbor's

    """

    # removing h
    protein_dict = protein.atom_dict[protein.atom_dict['atomicnum'] != 1]
    ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1]

    protein_atoms, ligand_atoms = close_contacts(
        protein_dict, ligand_dict, cutoff=distance_cutoff)
    splif = np.zeros((len(ligand_atoms)),
                     dtype=[('hash', int), ('ligand_coords', np.float32, (7, 3)),
                            ('protein_coords', np.float32, (7, 3))])

    lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, int(aidx))
                     for aidx in ligand_dict['id']}
    prot_atom_repr = {aidx: _ECFP_atom_repr(protein, int(aidx))
                      for aidx in protein_dict['id']}

    for i, (ligand_atom, protein_atom) in enumerate(zip(ligand_atoms,
                                                        protein_atoms)):
        if ligand_atom['atomicnum'] == 1 or protein_atom['atomicnum'] == 1:
            continue
        # function sorted used below solves isue, when order of parameteres
        # is not correct -> splif(protein, ligand)
        splif[i] = (hash32(tuple(sorted((
            _ECFP_atom_hash(ligand,
                            int(ligand_atom['id']),
                            depth=depth,
                            atom_repr_dict=lig_atom_repr)[-1],
            _ECFP_atom_hash(protein,
                            int(protein_atom['id']),
                            depth=depth,
                            atom_repr_dict=prot_atom_repr)[-1])))),
                    np.vstack((ligand_atom['coords'].reshape((1, 3)),
                               ligand_atom['neighbors'])),
                    np.vstack((protein_atom['coords'].reshape((1, 3)),
                               protein_atom['neighbors'])))

    # folding
    splif['hash'] = fold(splif['hash'], size)
    return np.sort(splif)
Пример #8
0
 def build(self, ligands, protein = None):
     """ Descriptor building method
     
     Parameters
     ----------
         ligands: array-like
             An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed
         
         protein: oddt.toolkit.Molecule object (default=None)
             Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein.
     
     Returns
     -------
         descs: numpy array, shape=[n_samples, 351]
             An array of binana descriptors, aligned with input ligands
     """
     if protein:
         self.set_protein(protein)
     else:
         protein = self.protein
     protein_dict = protein.atom_dict
     desc = None
     for mol in ligands:
         mol_dict = mol.atom_dict
         vec = np.array([], dtype=float)
         vec = tuple()
         # Vina
         ### TODO: Asynchronous output from vina, push command to score and retrieve at the end?
         ### TODO: Check if ligand has vina scores
         scored_mol = self.vina.score(mol, single=True)[0].data
         vina_scores = ['vina_affinity', 'vina_gauss1', 'vina_gauss2', 'vina_repulsion', 'vina_hydrophobic', 'vina_hydrogen']
         vec += tuple([scored_mol[key] for key in vina_scores])
         
         # Close Contacts (<4A)
         vec += tuple(self.cc_4.build(mol, single=True).flatten())
         
         # Electrostatics (<4A)
         ele_types = (('A', 'A'), ('A', 'C'), ('A', 'CL'), ('A', 'F'), ('A', 'FE'), ('A', 'HD'), ('A', 'MG'), ('A', 'MN'), ('A', 'N'), ('A', 'NA'), ('A', 'OA'), ('A', 'SA'), ('A', 'ZN'), ('BR', 'C'), ('BR', 'HD'), ('BR', 'OA'), ('C', 'C'), ('C', 'CL'), ('C', 'F'), ('C', 'HD'), ('C', 'MG'), ('C', 'MN'), ('C', 'N'), ('C', 'NA'), ('C', 'OA'), ('C', 'SA'), ('C', 'ZN'), ('CL', 'FE'), ('CL', 'HD'), ('CL', 'MG'), ('CL', 'N'), ('CL', 'OA'), ('CL', 'ZN'), ('F', 'HD'), ('F', 'N'), ('F', 'OA'), ('F', 'SA'), ('F', 'ZN'), ('FE', 'HD'), ('FE', 'N'), ('FE', 'OA'), ('HD', 'HD'), ('HD', 'I'), ('HD', 'MG'), ('HD', 'MN'), ('HD', 'N'), ('HD', 'NA'), ('HD', 'OA'), ('HD', 'P'), ('HD', 'S'), ('HD', 'SA'), ('HD', 'ZN'), ('MG', 'NA'), ('MG', 'OA'), ('MN', 'N'), ('MN', 'OA'), ('N', 'N'), ('N', 'NA'), ('N', 'OA'), ('N', 'SA'), ('N', 'ZN'), ('NA', 'OA'), ('NA', 'SA'), ('NA', 'ZN'), ('OA', 'OA'), ('OA', 'SA'), ('OA', 'ZN'), ('S', 'ZN'), ('SA', 'ZN'), ('A', 'BR'), ('A', 'I'), ('A', 'P'), ('A', 'S'), ('BR', 'N'), ('BR', 'SA'), ('C', 'FE'), ('C', 'I'), ('C', 'P'), ('C', 'S'), ('CL', 'MN'), ('CL', 'NA'), ('CL', 'P'), ('CL', 'S'), ('CL', 'SA'), ('CU', 'HD'), ('CU', 'N'), ('FE', 'NA'), ('FE', 'SA'), ('I', 'N'), ('I', 'OA'), ('MG', 'N'), ('MG', 'P'), ('MG', 'S'), ('MG', 'SA'), ('MN', 'NA'), ('MN', 'P'), ('MN', 'S'), ('MN', 'SA'), ('N', 'P'), ('N', 'S'), ('NA', 'P'), ('NA', 'S'), ('OA', 'P'), ('OA', 'S'), ('P', 'S'), ('P', 'SA'), ('P', 'ZN'), ('S', 'SA'), ('SA', 'SA'))
         ele_rec_types, ele_lig_types = zip(*ele_types)
         ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4')
         ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4')
         ele = tuple()
         for r_t, m_t in ele_types:
             mol_ele_dict, rec_ele_dict = interactions.close_contacts(ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4)
             if len(mol_ele_dict) and len(rec_ele_dict):
                 ele += (mol_ele_dict['charge'] * rec_ele_dict['charge']/ np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol
             else:
                 ele += 0,
         vec += tuple(ele)
         
         # Ligand Atom Types
         ligand_atom_types = ['A', 'BR', 'C', 'CL', 'F', 'HD', 'I', 'N', 'NA', 'OA', 'P', 'S', 'SA']
         atoms = atoms_by_type(mol_dict, ligand_atom_types, 'atom_types_ad4')
         atoms_counts = [len(atoms[t]) for t in ligand_atom_types]
         vec += tuple(atoms_counts)
         
         # Close Contacts (<2.5A)
         vec += tuple(self.cc_25.build(mol, single=True).flatten())
         
         # H-Bonds (<4A)
         hbond_mol, hbond_rec, strict = interactions.hbond(mol, protein, 4)
         # Retain only strict hbonds
         hbond_mol = hbond_mol[strict]
         hbond_rec = hbond_rec[strict]
         backbone = hbond_rec['isbackbone']
         alpha = hbond_rec['isalpha']
         beta = hbond_rec['isbeta']
         other = ~alpha & ~beta
         donor_mol = hbond_mol['isdonor']
         donor_rec = hbond_rec['isdonor']
         hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(),
                     (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(),
                     (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(),
                     (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum())
         vec += tuple(hbond_vec)
         
         # Hydrophobic contacts (<4A)
         hydrophobic = interactions.hydrophobic_contacts(mol, protein, 4)[1]
         backbone = hydrophobic['isbackbone']
         alpha = hydrophobic['isalpha']
         beta = hydrophobic['isbeta']
         other = ~alpha & ~beta
         hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(),
                    (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic))
         vec += tuple(hyd_vec)
         
         # Pi-stacking (<7.5A)
         pi_mol, pi_rec, pi_paralel, pi_tshaped = interactions.pi_stacking(mol, protein, 7.5)
         alpha = pi_rec['isalpha'] & pi_paralel
         beta = pi_rec['isbeta'] & pi_paralel
         other = ~alpha & ~beta & pi_paralel
         pi_vec = (alpha.sum(), beta.sum(), other.sum())
         vec += tuple(pi_vec)
         
         # count T-shaped Pi-Pi interaction
         alpha = pi_rec['isalpha'] & pi_tshaped
         beta = pi_rec['isbeta'] & pi_tshaped
         other = ~alpha & ~beta & pi_tshaped
         pi_t_vec = (alpha.sum(), beta.sum(), other.sum())
         
         # Pi-cation (<6A)
         pi_rec, cat_mol, strict = interactions.pi_cation(protein, mol, 6)
         alpha = pi_rec['isalpha'] & strict
         beta = pi_rec['isbeta'] & strict
         other = ~alpha & ~beta & strict
         pi_cat_vec = (alpha.sum(), beta.sum(), other.sum())
         
         pi_mol, cat_rec, strict = interactions.pi_cation(mol, protein, 6)
         alpha = cat_rec['isalpha'] & strict
         beta = cat_rec['isbeta'] & strict
         other = ~alpha & ~beta & strict
         pi_cat_vec += (alpha.sum(), beta.sum(), other.sum())
         
         vec += tuple(pi_cat_vec)
         
         # T-shape (perpendicular Pi's) (<7.5A)
         vec += tuple(pi_t_vec)
         
         # Active site flexibility (<4A)
         acitve_site = interactions.close_contacts(mol_dict, protein_dict, 4)[1]
         backbone = acitve_site['isbackbone']
         alpha = acitve_site['isalpha']
         beta = acitve_site['isbeta']
         other = ~alpha & ~beta
         as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(),
                    (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site))
         vec += tuple(as_flex)
         
         # Salt bridges (<5.5)
         salt_bridges = interactions.salt_bridges(mol, protein, 5.5)[1]
         vec += (salt_bridges['isalpha'].sum(), salt_bridges['isbeta'].sum(),
                                (~salt_bridges['isalpha'] & ~salt_bridges['isbeta']).sum(), len(salt_bridges))
         
         # Rotatable bonds
         vec += mol.num_rotors,
         
         if desc is None:
             desc = np.zeros(len(vec), dtype=float)
         desc = np.vstack((desc, np.array(vec, dtype=float)))
     
     return desc[1:]
Пример #9
0
 def time_close_contacts(self):
     for mol in self.mols:
         close_contacts(mol.atom_dict, self.protein.atom_dict, cutoff=10.)
Пример #10
0
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5,
           size=16384, count_bits=True, sparse=True, ignore_hoh=True):
    """Protein ligand extended connectivity fingerprint. For every pair of
    atoms in contact, compute ECFP and then hash every single, corresponding depth.

    Parameters
    ----------
    ligand, protein : oddt.toolkit.Molecule object
            Molecules, which are analysed in order to find interactions.
    depth_ligand, depth_protein : int (deafult = (2, 4))
        The depth of the fingerprint, i.e. the number of bonds in Morgan
        algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc.
    size: int (default = 16384)
        SPLIF is folded to given size.
    distance_cutoff: float (default=4.5)
        Cutoff distance for close contacts.
    sparse : bool (default = True)
        Should fingerprints be dense (contain all bits) or sparse (just the on
        bits).
    count_bits : bool (default = True)
        Should the bits be counted or unique. In dense representation it
        translates to integer array (count_bits=True) or boolean array if False.
    ignore_hoh : bool (default = True)
        Should the water molecules be ignored. This is based on the name of the
        residue ('HOH').

    Returns
    -------
    PLEC : numpy array
        Calculated fp (size = no. of atoms in contacts * max(depth_protein, depth_ligand))

    """
    result = []
    # removing h
    protein_mask = (protein.atom_dict['atomicnum'] != 1)
    if ignore_hoh:
        protein_mask = protein_mask & (protein.atom_dict['resname'] !='HOH')
    protein_dict = protein.atom_dict[protein_mask]
    ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1]

    # atoms in contact
    protein_atoms, ligand_atoms = close_contacts(
        protein_dict, ligand_dict, cutoff=distance_cutoff)

    for ligand_atom, protein_atom in zip(ligand_atoms['id'], protein_atoms['id']):
        ligand_ecfp = _ECFP_atom_hash(ligand, int(ligand_atom), depth=depth_ligand)
        protein_ecfp = _ECFP_atom_hash(protein, int(protein_atom), depth=depth_protein)
        assert len(ligand_ecfp) == depth_ligand + 1
        assert len(protein_ecfp) == depth_protein + 1
        # fillvalue is parameter from zip_longest
        # it's used, when ligand_ecfp and protein_ecfp are not the same size,
        # so if one is shorter the last given ECFP is used
        if depth_ligand < depth_protein:
            fillvalue = ligand_ecfp[-1]
        else:
            fillvalue = protein_ecfp[-1]
        for pair in zip_longest(ligand_ecfp, protein_ecfp, fillvalue=fillvalue):
                result.append(hash32(pair))
    # folding and sorting
    plec = np.sort(fold(np.array(result), size=size))

    # count_bits
    if not count_bits:
        plec = np.unique(plec)

    # sparse or dense FP
    if not sparse:
        plec = sparse_to_dense(plec, size=size)
    return plec
Пример #11
0
 def time_close_contacts(self):
     for mol in self.mols:
         close_contacts(mol.atom_dict, self.protein.atom_dict, cutoff=10.)