Пример #1
0
def CalculateProtrudeShapeDistance(RefMol, ProbeMol):
    """Calculate protrude shape for a pair of already aligned molecules and return it as a string"""

    Distance = rdShapeHelpers.ShapeProtrudeDist(ProbeMol, RefMol)
    Distance = "%.2f" % Distance

    return Distance
Пример #2
0
def get_SucosScore(ref_mol, query_mol, tani=False, ref_features=None, query_features=None, score_mode=FeatMaps.FeatMapScoreMode.All):
    """
    This is the key function that calculates the SuCOS scores and is expected to be called from other modules.
    To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having
    to recalculate them. Use the getRawFeatures function to pre-calculate the features.

    :param ref_mol: The reference molecule to compare to
    :param query_mol: The molecule to align to the reference
    :param tani: Whether to calculate Tanimoto distances
    :param ref_features: An optional feature map for the reference molecule, avoiding the need to re-calculate it.
    :param query_features: An optional feature map for the query molecule, avoiding the need to re-calculate it.
    :return: A tuple of 3 values. 1 the sucos score, 2 the feature map score,
        3 the Tanimoto distance or 1 minus the protrude distance
    """

    if not ref_features:
        ref_features = getRawFeatures(ref_mol)
    if not query_features:
        query_features = getRawFeatures(query_mol)

    fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode)
    fm_score = np.clip(fm_score, 0, 1)

    if tani:
        tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol))
        tani_sim = np.clip(tani_sim, 0, 1)
        SuCOS_score = 0.5*fm_score + 0.5*tani_sim
        return SuCOS_score, fm_score, tani_sim
    else:
        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False)
        protrude_dist = np.clip(protrude_dist, 0, 1)
        protrude_val = 1.0 - protrude_dist
        SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val
        return SuCOS_score, fm_score, protrude_val
Пример #3
0
    def sucos_mol_to_mol(self,
                         mol1,
                         mol2,
                         score_mode=FeatMaps.FeatMapScoreMode.All):
        '''
        Get the SuCOS score for one mol compared to another mol (mol=rdkit mol object)

        :param mol1: rdkit mol object (small mol)
        :param mol2: rdkit mol object (large mol)
        :param score_mode: default = featuremaps score, defined in init
        :return:
        '''
        ref = Chem.AddHs(mol1)
        prb = Chem.AddHs(mol2)

        fm_score = self.get_fm_score(ref, prb, score_mode)
        fm_score = np.clip(fm_score, 0, 1)

        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref,
                                                         prb,
                                                         allowReordering=False)
        protrude_dist = np.clip(protrude_dist, 0, 1)

        SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)

        return SuCOS_score
Пример #4
0
def calc_SC_RDKit_score(query_mol, ref_mol):
    fm_score = get_FeatureMapScore(query_mol, ref_mol)

    protrude_dist = rdShapeHelpers.ShapeProtrudeDist(query_mol,
                                                     ref_mol,
                                                     allowReordering=False)
    SC_RDKit_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)

    return SC_RDKit_score
Пример #5
0
def get_SucosScore(ref_mol, query_mol, field_name):
    fm_score = get_FeatureMapScore(ref_mol, query_mol)
    #utils.log("FeatureMapScore:", str(fm_score))
    protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol,
                                                     query_mol,
                                                     allowReordering=False)
    #utils.log("ProtrudeDistance:", str(protrude_dist))
    #utils.log("Sucos calc: 0.5 *", str(fm_score), "+ 0.5 * (1.0 -", protrude_dist, ")")
    score = 0.5 * fm_score + 0.5 * (1.0 - protrude_dist)
    #utils.log("SucosScore:", str(score))
    query_mol.SetDoubleProp(field_name, score)
    return score
Пример #6
0
def score(reflig, prb_mols, ids, score_mode=FeatMaps.FeatMapScoreMode.All, p=False):
    ref = Chem.AddHs(reflig)
    idx = 0

    results_sucos = {}
    results_tani = {}

    smi_mol = Chem.MolToSmiles(prb_mols)

    for i in ids:

        prb = Chem.AddHs(Chem.MolFromMolBlock(Chem.MolToMolBlock(prb_mols, confId=i)))

        fm_score = get_FeatureMapScore(ref, prb, score_mode)
        fm_score = np.clip(fm_score, 0, 1)

        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref, prb,
                                                         allowReordering=False)
        protrude_dist = np.clip(protrude_dist, 0, 1)

        SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)
        tanimoto_score = Chem.rdShapeHelpers.ShapeTanimotoDist(ref, prb)

        results_sucos[str(idx)] = SuCOS_score
        results_tani[str(idx)] = tanimoto_score

        if p:
            print("********************************")
            print("index: " + str(idx))
            print("SuCOS score:\t%f" % SuCOS_score)
            print("Tani score:\t%f" % tanimoto_score)
            print("********************************")

        idx += 1

    return results_sucos
Пример #7
0
def get_sucos(frag_sdf_folder, docked_sdf_file):

    path = frag_sdf_folder + '/'

    frag_mol_list = [
        Chem.MolFromMolFile((path + sdf_file), sanitize=True)
        for sdf_file in os.listdir(frag_sdf_folder)
    ]

    docked_mol_list = Chem.SDMolSupplier(docked_sdf_file, sanitize=True)

    docked_mol_list = [mol for mol in docked_mol_list if mol is not None]

    all_frags_scores = []

    for docked_mol in docked_mol_list:
        docked_name = docked_mol.GetProp('_Name')
        print('Getting values for {}'.format(docked_name))

        sucos_scores = []
        frags_complete = []
        for frag_mol in frag_mol_list:
            ##############################################
            ####### Feature map
            ##############################################
            fm_score = get_FeatureMapScore(frag_mol, docked_mol)
            fm_score = np.clip(fm_score, 0, 1)
            ##############################################

            protrude_dist = rdShapeHelpers.ShapeProtrudeDist(
                frag_mol, docked_mol, allowReordering=False)
            protrude_dist = np.clip(protrude_dist, 0, 1)
            SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)
            sucos_scores.append(SuCOS_score)
            frags_complete.append(frag_mol.GetProp('_Name'))

        frag_scores = list(zip(frags_complete, sucos_scores))
        insp_frags = docked_mol.GetProp('fragments')
        found, frag, score = get_frag_match(insp_frags, frag_scores)

        # get frag mol using index of max frag

        frag_mol_index = frags_complete.index(frag)
        frag_mol = frag_mol_list[frag_mol_index]

        # Get frag and compound SMILES
        frag_SMILES = Chem.MolToSmiles(frag_mol)
        docked_SMILES = Chem.MolToSmiles(docked_mol)

        # Get avg sucos scores
        avg_score = get_avg_sucos(sucos_scores)

        # Get all scores
        all_frags_scores.append((docked_name, frag, found, docked_SMILES,
                                 frag_SMILES, score, avg_score))

        with open('sucos_scores/JC_sucos_scores.csv', 'w') as f:
            writer = csv.writer(f)
            writer.writerow([
                'Compound_name', 'Fragment', 'Insp_frag_found', 'dockedSMILES',
                'fragSMILES', 'SuCOS_score', 'Avg_SuCOS_score'
            ])
            writer.writerows(all_frags_scores)
Пример #8
0
def getReverseScores(mols, frags, score_threshold, writer):

    for mol in mols:

        # Get the bits
        compound_bits = getBits(mol)

        all_scores = []

        for bit in compound_bits:

            # Let's remove wildcard atoms
            # Removing wildcard atoms does not impact feat score but does lower shape overlay
            # For scoring should multiply feat score by number of non-wilcard atoms and use
            # all atoms including wildcard for shape overlay
            bit_without_wildcard_atoms = Chem.DeleteSubstructs(
                bit, Chem.MolFromSmarts('[#0]'))

            # Let's only score bits that have more than one atom (do not count wildcard atoms)
            # Get number of bit atoms without wildcard atoms
            no_bit_atoms_without_wild_card = bit_without_wildcard_atoms.GetNumAtoms(
            )

            # Get number of bit atoms
            no_bit_atoms = bit.GetNumAtoms()

            # Only score if enough info in bit to describe a vector - this will bias against
            # cases where frag has long aliphatic chain

            if no_bit_atoms_without_wild_card > 1:

                scores = []

                for frag_mol in frags:

                    # Get frag name for linking to score
                    frag_name = frag_mol.GetProp('_Name').strip('Mpro-')

                    # Score only if some common structure shared between bit and fragment.
                    # Check if MCS yield > 0 atoms
                    mcs_match = rdFMCS.FindMCS([bit, frag_mol],
                                               ringMatchesRingOnly=True,
                                               matchValences=True)

                    # Get mcs_mol from mcs_match
                    mcs_mol = Chem.MolFromSmarts(mcs_match.smartsString)

                    # check if frag has MCS mol
                    mcs_test = frag_mol.HasSubstructMatch(mcs_mol)

                    if mcs_test:

                        # Change van der Waals radius scale for stricter overlay
                        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(
                            bit, frag_mol, allowReordering=False, vdwScale=0.2)
                        protrude_dist = np.clip(protrude_dist, 0, 1)

                        protrude_score = 1 - protrude_dist

                        # We are comparing small bits relative to large frags
                        # If overlay poor then assign score of 0
                        # NB reverse SuCOS scoring. Feat map is also comp
                        # more expensive

                        if protrude_score > score_threshold:

                            fm_score = getFeatureMapScore(bit, frag_mol)
                            fm_score = np.clip(fm_score, 0, 1)

                            # What about good shape overlay but poor feat match?
                            # Let's add a cutoff here to prevent good overlays with
                            # poor feat match - eg. 3 mem ring 2 x C atoms overlay well
                            # with 2 x aromatic ring Cs

                            if fm_score > score_threshold:
                                # Use modified SuCOS score where feat_score scaled by number of bit atoms
                                # without wildcard atoms and the shape overlay score by the number of bit atoms
                                # including wildcard atoms
                                scores.append(
                                    (frag_name, protrude_score, no_bit_atoms,
                                     fm_score, no_bit_atoms_without_wild_card))
                            else:
                                scores.append((frag_name, 0, no_bit_atoms, 0,
                                               no_bit_atoms_without_wild_card))
                        else:
                            scores.append((frag_name, 0, no_bit_atoms, 0,
                                           no_bit_atoms_without_wild_card))
                    else:
                        scores.append((frag_name, 0, no_bit_atoms, 0,
                                       no_bit_atoms_without_wild_card))

                all_scores.append(scores)

                list_dfs = []

                for score in all_scores:

                    df = pd.DataFrame(data=score,
                                      columns=[
                                          'Fragment', 'Shape_score',
                                          'no_bit_atoms', 'Feat_score',
                                          'no_bit_atoms_without_wild_card'
                                      ])

                    # Get maximum scoring fragment for bit match
                    df['Modified_SuCOS_score'] = 0.5 * (
                        df.Feat_score * df.no_bit_atoms_without_wild_card
                    ) + 0.5 * (df.Shape_score * df.no_bit_atoms)
                    df = df[df['Modified_SuCOS_score'] ==
                            df['Modified_SuCOS_score'].max()]
                    list_dfs.append(df)

                final_df = pd.concat(list_dfs)

        # Score 1: the score is scaled by the number of bit atoms
        score_1 = final_df.Modified_SuCOS_score.sum()

        # Let's only get frags with a score > 0
        #final_df['SuCOS_score'] = 0.5 * final_df.Feat_score + 0.5 * final_df.Shape_score
        final_df = final_df[final_df.Modified_SuCOS_score > 0]

        # Get the unique fragments above threshold
        all_frags = pd.unique(final_df.Fragment)

        # Add props we want
        mol.SetProp(field_XCosRefMols, ','.join(all_frags))
        mol.SetIntProp(field_XCosNumHits, len(all_frags))
        mol.SetProp(field_XCosScore1, "{:.4f}".format(score_1))

        # Write to file
        writer.write(mol)
        writer.flush()
Пример #9
0
def main(ref_file,
         prb_file,
         write=True,
         return_all=False,
         score_mode=FeatMaps.FeatMapScoreMode.All):
    if type(ref_file) == str:
        if os.path.splitext(ref_file)[-1] == '.sdf':
            reflig = Chem.MolFromMolFile(ref_file, sanitize=True)
        elif os.path.splitext(ref_file)[-1] == '.mol2':
            reflig = Chem.MolFromMol2File(ref_file, sanitize=True)
    elif type(ref_file) == rdkit.Chem.rdchem.Mol:
        reflig = ref_file

    if type(prb_file) == str:
        if os.path.splitext(prb_file)[-1] == '.sdf':
            prb_mols = Chem.SDMolSupplier(prb_file, sanitize=True)
        elif os.path.splitext(prb_file)[-1] == '.gz':
            tmp = os.path.splitext(prb_file)[0]
            if os.path.splitext(tmp)[-1] == '.sdf':
                inf = gzip.open(prb_file)
                prb_mols = Chem.ForwardSDMolSupplier(inf, sanitize=True)
    elif type(prb_file) == rdkit.Chem.rdchem.Mol:
        prb_mols = [prb_file]

    try:
        reflig
    except NameError:
        raise ValueError("Incorrect file format for ref lig")
    try:
        prb_mols
    except NameError:
        raise ValueError("Incorrect file format for prb lig")

    if write:
        w = Chem.SDWriter("%s_SuCOS_score.sdf" % os.path.splitext(prb_file)[0])
    prb_mols = [x for x in prb_mols if x]

    for prb_mol in prb_mols:
        ##############################################
        ####### Feature map
        ##############################################
        fm_score = get_FeatureMapScore(reflig, prb_mol, score_mode)
        fm_score = np.clip(fm_score, 0, 1)
        ##############################################

        #tversky_ind = rdShapeHelpers.ShapeTverskyIndex(reflig, prb_mol, 1.0, 0.0)
        #SuCOS_score = 0.5*fm_score + 0.5*tversky_ind

        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(reflig,
                                                         prb_mol,
                                                         allowReordering=False)
        protrude_dist = np.clip(protrude_dist, 0, 1)
        SuCOS_score = 0.5 * fm_score + 0.5 * (1 - protrude_dist)

        print("********************************")
        print("SuCOS score:\t%f" % SuCOS_score)
        print("********************************")
        prb_mol.SetProp("SuCOS_score", str(SuCOS_score))
        prb_mol.SetProp("Volume_score", str(1 - protrude_dist))
        prb_mol.SetProp("Feature_score", str(fm_score))
        if write:
            w.write(prb_mol)
    if return_all:
        return SuCOS_score, fm_score, (1 - protrude_dist)
    else:
        return SuCOS_score