Пример #1
0
 def test_hash_ecfp_pair(self):
     for power in (2, 16, 64):
         for _ in range(10):
             string1 = random_string(10)
             string2 = random_string(10)
             pair_hash = rgf.hash_ecfp_pair((string1, string2), power)
             self.assertIsInstance(pair_hash, integer_types)
             self.assertLess(pair_hash, 2**power)
             self.assertGreaterEqual(pair_hash, 0)
Пример #2
0
 def test_hash_ecfp_pair(self):
   for power in (2, 16, 64):
     for _ in range(10):
       string1 = random_string(10)
       string2 = random_string(10)
       pair_hash = rgf.hash_ecfp_pair((string1, string2), power)
       self.assertIsInstance(pair_hash, integer_types)
       self.assertLess(pair_hash, 2**power)
       self.assertGreaterEqual(pair_hash, 0)
Пример #3
0
def getECFPstringsRadiusN_avg_ifp(mols_info,
                                  heavy_atoms=0,
                                  base_prop=['AtomicMass'],
                                  sa_dicts=[{}, {}],
                                  contacts=[[], []],
                                  ifptype='splif',
                                  degrees=[1, 1],
                                  parameters=[{
                                      'weighted': 0,
                                      'alpha': -1,
                                      'alpha_step': 0.1
                                  }, {
                                      'weighted': 0,
                                      'alpha': -1,
                                      'alpha_step': 0.1
                                  }],
                                  hash_type='str',
                                  idf_power=64):
    """Obtain pairs of molecular fragments of contacting molecules outward to given degree, using the splif or plec procedure.
    For each fragment in a pair, compute average atomic properties (and SMILES stringfor now) and hash to an integer.
    Parameters:
        mols_info - a list of two tuples each describing a molecule (coordinates, rdkit.Chem.rdchem.Mol molecule, weights)
                    it represents a pair of contacting molecules (e.g. protein and ligand)
        heavy_atoms, base_prop, hash_type and ifp_power same as in getECFPstringsRadiusN_avg_ecfp
        sa_dicts - a list of two dictionaries each mapping atom indices to their solid angles in a molecule
        contacts - a list of two index lists each indicating the queried atoms in a molecule
        ifptype - either 'splif' or 'plec'
        degrees - ecfp radii
        parameters - a list of parameter dictionaries, each for calculating the solid angles of surface atoms of a molecule
    Returns a dictionary mapping atom-pair indices to a string or vector that is to be hashed later.
    """
    ecfp_dict = {}
    mols = [mols_info[0][1], mols_info[1][1]]
    nPairs = len(contacts[0])
    if nPairs == 0:
        print('Wrong contact list!')
        return ecfp_dict
    else:
        neighborhoods = []
        deadAtomPairs = {}
        sa_lists = [{}, {}]
        if len([p for p in base_prop if 'SolidAngle' in p]) > 0:
            for i in [0, 1]:
                if len(sa_dicts[i]) == 0:
                    tmp = concave_hull_3D(
                        points=mols_info[i][0],
                        weights=mols_info[i][2],
                        alpha=parameters[i]['alpha'],
                        alpha_step=parameters[i]['alpha_step'])
                    tmp.construct_conchull()
                    sa_lists[i] = tmp
                else:
                    sa_lists[i] = sa_dicts[i]

        if ifptype == 'splif':
            dg_pairs = [(degrees[0], degrees[1])]
        elif ifptype == 'plec':
            dg_pairs = plec_pairing(plec_degrees=degrees)
        else:
            print('Wrong ifp type!')
            return ecfp_dict

        for dgs in dg_pairs:
            neighborhoodThisRound = []
            for (a1, a2) in zip(contacts[0], contacts[1]):
                inds = (int(a1), int(a2))
                if inds not in deadAtomPairs:
                    atoms = (mols[0].GetAtomWithIdx(inds[0]),
                             mols[1].GetAtomWithIdx(inds[1]))
                    sign1 = (heavy_atoms and (atoms[0].GetAtomicNum() == 1
                                              or atoms[1].GetAtomicNum() == 1))
                    sign2 = (atoms[0].GetDegree() == 0
                             or atoms[1].GetDegree() == 0)
                    if sign1 or sign2:
                        deadAtomPairs[inds] = 1
                        continue
                    nbhd_pairs = []
                    for k in [0, 1]:
                        env = list(
                            Chem.FindAtomEnvironmentOfRadiusN(
                                mols[k],
                                dgs[k],
                                inds[k],
                                useHs=not heavy_atoms))
                        env.sort()
                        tmp_aids = set([
                            mols[k].GetBondWithIdx(bid).GetBeginAtomIdx()
                            for bid in env
                        ] + [
                            mols[k].GetBondWithIdx(bid).GetEndAtomIdx()
                            for bid in env
                        ])
                        env_aids = set([inds[k]
                                        ]) if len(tmp_aids) == 0 else tmp_aids
                        tmpprop = get_atom_proplist(mol=mols[k],
                                                    sa_dict=sa_lists[k],
                                                    aids=env_aids,
                                                    base_prop=base_prop,
                                                    hash_type=hash_type)
                        #                        submol = Chem.PathToSubmol(mols[k], env)
                        #                        tmp_smile = Chem.MolToSmiles(submol)
                        #                        smile = atoms[k].GetSymbol() if tmp_smile == '' else tmp_smile
                        #                        tmpprop += [smile]
                        nbhd_pairs.append((env, tmpprop, inds[k]))
                    if dgs == (0, 0):
                        if hash_type == 'str':
                            tobehashed = (','.join(nbhd_pairs[0][1]),
                                          ','.join(nbhd_pairs[1][1]))
                            idf = hash_ecfp_pair(ecfp_pair=tobehashed,
                                                 power=idf_power)
                        elif hash_type == 'vec':
                            tobehashed = (tuple(nbhd_pairs[0][1]),
                                          tuple(nbhd_pairs[1][1]))
                            idf = hash(tobehashed)
                        else:
                            print('Wrong hash type!!!')
                            return ecfp_dict
                        ecfp_dict[(inds, 'r0-r0')] = idf
                    else:
                        neighborhoodThisRound.append(nbhd_pairs)
                        if (nbhd_pairs[0][0],
                                nbhd_pairs[1][0]) in neighborhoods:
                            deadAtomPairs[inds] = 1
            if dgs != (0, 0):
                neighborhoodThisRound.sort()
                for candidate in neighborhoodThisRound:
                    envs = (candidate[0][0], candidate[1][0])
                    cand_inds = (candidate[0][2], candidate[1][2])
                    if envs not in neighborhoods:
                        neighborhoods.append(envs)
                        if hash_type == 'str':
                            tobehashed = (','.join(candidate[0][1]),
                                          ','.join(candidate[1][1]))
                            idf = hash_ecfp_pair(ecfp_pair=tobehashed,
                                                 power=idf_power)
                        elif hash_type == 'vec':
                            tobehashed = (tuple(candidate[0][1]),
                                          tuple(candidate[1][1]))
                            idf = hash(tobehashed)
                        else:
                            print('Wrong hash type!!!')
                            return ecfp_dict
                        ecfp_dict[(cand_inds, 'r' + str(dgs[0]) + '-r' +
                                   str(dgs[1]))] = idf
                    else:
                        deadAtomPairs[cand_inds] = 1
        return ecfp_dict