Пример #1
0
def _smilarity_between_two_mols(mol1, mol2):
    # mol1, mol2 = Chem.MolFromSmiles(smi1), Chem.MolFromSmiles(smi2)
    vec1 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol1, 4, nBits=512)
    vec2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, 4, nBits=512)

    tani = DataStructs.TanimotoSimilarity(vec1, vec2)
    return tani
Пример #2
0
    def calculate_fp(smi: str, fp_type: str):
        """Calculates fp based on fp_type and smiles"""
        
        mol = Chem.MolFromSmiles(smi)
        if mol:
            #Circular fingerprints
            if fp_type == "ECFP4":
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024) # ECFP4
            elif fp_type == "ECFP6":
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=3, nBits=1024) # ECFP6

            # Structural fingerprints:
            elif fp_type == "Avalon":
                fp = pyAvalonTools.GetAvalonFP(mol, nBits=1024) # Avalon
            elif fp_type == "MACCSkeys":
                fp = rdkit.Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol) #MACCS Keys
            
            # Path-based fingerprints
            elif fp_type == "hashAP":
                fp = rdkit.Chem.rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=1024)
            elif fp_type == "hashTT":
                fp = rdkit.Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=1024)
            elif fp_type == "RDK5":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=5, fpSize=1024, nBitsPerHash=2)
            elif fp_type == "RDK6":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=6, fpSize=1024, nBitsPerHash=2)
            elif fp_type == "RDK7":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=7, fpSize=1024, nBitsPerHash=2)
        
            return np.asarray(fp).reshape(1, -1)

        else:
            return None
Пример #3
0
 def __init__(self, fp_type, fp_bits=2048):
     """
     :param fp_type: fingerprint type
     :param fp_bits: number of fingerprint bits
     """
     self.fp_type = fp_type
     self.fp_dict = {}
     self.fp_dict['morgan2'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 2, nBits=fp_bits),
         fp_bits
     ]
     self.fp_dict['morgan3'] = [
         lambda m: rdmd.GetMorganFingerprintAsBitVect(m, 3, nBits=fp_bits),
         fp_bits
     ]
     self.fp_dict['ap'] = [
         lambda m: rdmd.GetHashedAtomPairFingerprintAsBitVect(
             m, nBits=fp_bits), fp_bits
     ]
     self.fp_dict['rdk5'] = [
         lambda m: Chem.RDKFingerprint(
             m, maxPath=5, fpSize=fp_bits, nBitsPerHash=2), fp_bits
     ]
     if self.fp_dict.get(fp_type):
         self.fp_function = self.fp_dict[fp_type]
     else:
         print("invalid fingerprint type: %s" % fp_type)
         sys.exit(0)
def mol_train_test(dataset,
                   labels,
                   test_size=0.1,
                   random_state=2019,
                   nbits=1024):

    # TAKING WRONG INCHIS
    all_mols = [
        Chem.MolFromSmiles(SMILES_string)
        for SMILES_string in dataset['SMILES']
    ]
    drop_index = [i for i, mol in enumerate(all_mols)
                  if mol == None]  # FINDING WRONG INCHIS

    # DROP FROM MOLS, lABELS, AND DATASET
    if len(drop_index) != 0:
        labels = labels.drop(drop_index).reset_index(drop=True)
        dataset = dataset.drop(drop_index).reset_index(drop=True)

    all_mols = [
        Chem.MolFromSmiles(SMILES_string)
        for SMILES_string in dataset['SMILES']
    ]  ### FIND BETTER WAY TO NOT CALCULATE AGAIN!!!!

    # TRAIN-TEST SPLITS
    train_mols, test_mols, y_train, y_test = train_test_split(all_mols, labels, test_size=test_size\
                                                              , random_state=random_state)

    # CONVERT TRAINING MOLECULES INTO FINGERPRINT AS 256BITS VECTORS
    bi = {}
    fps = [rdMolDescriptors.GetMorganFingerprintAsBitVect(m, radius=2, bitInfo= bi, nBits=nbits) \
           for m in train_mols]

    # PUT ALL EACH OF THE CORRESPONDING 256BITS FINGERPRINTS INTO A LIST
    train_fps_array = []
    for fp in fps:
        arr = np.zeros((1, ), dtype=int)
        DataStructs.ConvertToNumpyArray(fp, arr)
        train_fps_array.append(arr)

    # CONVERT InChi STRINGS INTO MOLECULES FOR TEST DATA
    test_fps = [rdMolDescriptors.GetMorganFingerprintAsBitVect(test_m, radius=2, bitInfo= bi, nBits=nbits) \
            for test_m in test_mols]

    #Convert testing fingerprints into binary, and put all testing binaries into arrays
    test_np_fps_array = []
    for test_fp in test_fps:
        test_arr = np.zeros((1, ), dtype=int)
        DataStructs.ConvertToNumpyArray(test_fp, test_arr)
        test_np_fps_array.append(test_arr)

    return dataset, labels, all_mols, y_train, y_test, train_fps_array, test_np_fps_array
Пример #5
0
def maxmin_picker(dataset: list,
                  input_format='smiles',
                  n=3,
                  seed=123,
                  radius=2,
                  nBits=1024):
    """
    Select a subset of molecules and return a list of diverse RDKit mols.
    http://rdkit.blogspot.com/2014/08/optimizing-diversity-picking-in-rdkit.html
    """

    if input_format == 'smiles':
        mols = [
            Chem.MolFromSmiles(smi) for smi in dataset
            if Chem.MolFromSmiles(smi)
        ]
    elif input_format == 'mol':
        mols = dataset
    else:
        print('Format not recognized')
        raise

    fps = [
        rdMolDescriptors.GetMorganFingerprintAsBitVect(m,
                                                       radius=radius,
                                                       nBits=nBits)
        for m in mols
    ]

    mmp = SimDivFilters.MaxMinPicker()
    ids = mmp.LazyBitVectorPick(fps, len(fps), n)
    subset = [mols[i] for i in ids]

    return subset
Пример #6
0
  def _featurize(self, mol):
    """
    Calculate circular fingerprint.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.
    """
    if self.sparse:
      info = {}
      fp = rdMolDescriptors.GetMorganFingerprint(
          mol, self.radius, useChirality=self.chiral,
          useBondTypes=self.bonds, useFeatures=self.features,
          bitInfo=info)
      fp = fp.GetNonzeroElements()  # convert to a dict

      # generate SMILES for fragments
      if self.smiles:
        fp_smiles = {}
        for fragment_id, count in fp.items():
          root, radius = info[fragment_id][0]
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root)
          frag = Chem.PathToSubmol(mol, env)
          smiles = Chem.MolToSmiles(frag)
          fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
        fp = fp_smiles
    else:
      fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
          mol, self.radius, nBits=self.size, useChirality=self.chiral,
          useBondTypes=self.bonds, useFeatures=self.features)
    return fp
Пример #7
0
def calculate_fingerprint(smiles, radi):
    binary = np.zeros((2048 * (radi)), int)
    formula = np.zeros((2048), int)
    mol = Chem.MolFromSmiles(smiles)

    mol = Chem.AddHs(mol)
    mol_bi = {}
    for r in range(radi + 1):
        mol_fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol,
                                                                radius=r,
                                                                bitInfo=mol_bi,
                                                                nBits=2048)
        mol_bi_QC = []
        for i in mol_fp.GetOnBits():
            num_ = len(mol_bi[i])
            for j in range(num_):
                if mol_bi[i][j][1] == r:
                    mol_bi_QC.append(i)
                    break

        if r == 0:
            for i in mol_bi_QC:
                formula[i] = len([k for k in mol_bi[i] if k[1] == 0])
        else:
            for i in mol_bi_QC:
                binary[(2048 * (r - 1)) + i] = len(
                    [k for k in mol_bi[i] if k[1] == r])

    return formula.reshape(1, 2048), binary.reshape(1, 4096)
Пример #8
0
def smiles2fp(smiles, radius=2, n_bits=1024):
    mol = Chem.MolFromSmiles(smiles)
    try:
        return desc.GetMorganFingerprintAsBitVect(mol, radius, n_bits)
    except Exception as e:
        print(e)
        return -1
Пример #9
0
    def testDrawMorgan(self):
        m = Chem.MolFromSmiles('c1ccccc1CC1CC1')
        bi = {}
        _ = rdMolDescriptors.GetMorganFingerprintAsBitVect(m,
                                                           radius=2,
                                                           bitInfo=bi)
        self.assertTrue(872 in bi)

        svg1 = Draw.DrawMorganBit(m, 872, bi)
        aid, r = bi[872][0]
        svg2 = Draw.DrawMorganEnv(m, aid, r)
        self.assertEqual(svg1, svg2)
        self.assertTrue("style='fill:#CCCCCC;" in svg1)
        self.assertTrue("style='fill:#E5E533;" in svg1)
        self.assertTrue("style='fill:#9999E5;" in svg1)

        svg1 = Draw.DrawMorganBit(m, 872, bi, centerColor=None)
        aid, r = bi[872][0]
        svg2 = Draw.DrawMorganEnv(m, aid, r, centerColor=None)
        self.assertEqual(svg1, svg2)
        self.assertTrue("style='fill:#CCCCCC;" in svg1)
        self.assertTrue("style='fill:#E5E533;" in svg1)
        self.assertFalse("style='fill:#9999E5;" in svg1)
        with self.assertRaises(KeyError):
            Draw.DrawMorganBit(m, 32, bi)

        if hasattr(Draw, 'MolDraw2DCairo'):
            # Github #3796: make sure we aren't trying to generate metadata:
            png = Draw.DrawMorganBit(m, 872, bi, useSVG=False)
            self.assertIn(b'PNG', png)
            self.assertIsNone(Chem.MolFromPNGString(png))
Пример #10
0
def GenerateMorganFeaturesFingerprints(Mols):
    """Generate MorganFeatures fingerprints."""

    MiscUtil.PrintInfo("\nGenerating MorganFeatures %s fingerprints..." %
                       OptionsInfo["SpecifiedFingerprintsType"])

    # Setup fingerprints parameters...
    Radius = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["Radius"]
    UseChirality = OptionsInfo["FingerprintsParams"]["MorganFeatures"][
        "UseChirality"]
    FPSize = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["FPSize"]
    UseFeatures = True

    if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I):
        # Generate ExplicitBitVect fingerprints...
        MiscUtil.PrintInfo("FPSize: %s" % (FPSize))
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprintAsBitVect(
                Mol,
                Radius,
                useFeatures=UseFeatures,
                useChirality=UseChirality,
                nBits=FPSize) for Mol in Mols
        ]
    else:
        # Generate UIntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprint(Mol,
                                                  Radius,
                                                  useFeatures=UseFeatures,
                                                  useChirality=UseChirality)
            for Mol in Mols
        ]

    return MolsFingerprints
Пример #11
0
def GenerateMorganFeaturesFingerprints(Mols):
    """Generate MorganFeatures fingerprints."""

    MiscUtil.PrintInfo("\nGenerating  MorganFeatures fingerprints...")

    # Setup fingerprints parameters...
    Radius = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["Radius"]
    UseChirality = OptionsInfo["FingerprintsParams"]["MorganFeatures"][
        "UseChirality"]
    UseFeatures = True

    if OptionsInfo["GenerateBitVectFingerints"]:
        # Generate ExplicitBitVect fingerprints...
        FPSize = 2048
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprintAsBitVect(
                Mol,
                Radius,
                useFeatures=UseFeatures,
                useChirality=UseChirality,
                nBits=FPSize) for Mol in Mols
        ]
    else:
        # Generate UIntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprint(Mol,
                                                  Radius,
                                                  useFeatures=UseFeatures,
                                                  useChirality=UseChirality)
            for Mol in Mols
        ]

    return MolsFingerprints
Пример #12
0
def dmat_sim(smiles_target, ntopick=10):
    """
    Function to select most dissimilar compounds from a given set
    Adapted from:
        http://rdkit.blogspot.com/2014/08/optimizing-diversity-picking-in-rdkit.html

    Args:
        smiles_target: DataFrame which contains compound-target activity pairs.
        The compounds should be in the smiles strings format and in a column
        named "smiles"
        ntoppick: The number of dissimiliar compounds to pick from the ranked
        list of dissimilarity

    Returns:
        A DataFrame of compound-target activity pairs that were sampled from
        the input smiles_target DataFrame based on their dissimilarity
    """
    ds = []
    smiles_target.reset_index(drop=True, inplace=True)
    mols = [MolFromSmiles(smi) for smi in smiles_target['smiles']]
    fps = [rdMolDescriptors.GetMorganFingerprintAsBitVect(m, 2) for m in mols]
    for i in range(1, len(fps)):
        ds.extend(
            DataStructs.BulkTanimotoSimilarity(fps[i],
                                               fps[:i],
                                               returnDistance=True))
    mmp = SimDivFilters.MaxMinPicker()
    ids = mmp.Pick(np.array(ds), len(fps), ntopick)
    smiles_target_dissim = smiles_target.iloc[list(ids)]

    return smiles_target_dissim
Пример #13
0
    def _compute_fps(self) -> None:
        """Compute a numpy array of Morgan fingerprint vectors.
        """
        fp_vects = []
        for mol in tqdm.tqdm(self.data.mol,
                             desc='Computing fingerprints',
                             disable=self.prog):

            if self.fp_type == 'morgan':
                fp_vect = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    mol, self.fp_rad, self.fp_bits)

            if self.fp_type == 'rdkit':
                fp_vect = Chem.RDKFingerprint(
                    mol,
                    minPath=self.fp_rad,
                    maxPath=self.fp_rad,
                    fpSize=self.fp_bits,
                )

            array = np.zeros((0, ), dtype=np.int8)
            DataStructs.ConvertToNumpyArray(fp_vect, array)
            fp_vects.append(array)

        self.fps = np.zeros((len(fp_vects), self.fp_bits))
        for i, fp_vect in enumerate(fp_vects):
            self.fps[i, :] = fp_vect
Пример #14
0
 def calculateMol(self, m, smiles, internalParsing=False):
     return list(
         rd.GetMorganFingerprintAsBitVect(
             m,
             radius=self.radius,
             nBits=self.nbits,
             invariants=rd.GetFeatureInvariants(m)))
Пример #15
0
    def testDrawMorgan(self):
        from rdkit.Chem import rdMolDescriptors
        m = Chem.MolFromSmiles('c1ccccc1CC1CC1')
        bi = {}
        fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(m,
                                                            radius=2,
                                                            bitInfo=bi)
        self.assertTrue(872 in bi)

        svg1 = Draw.DrawMorganBit(m, 872, bi)
        aid, r = bi[872][0]
        svg2 = Draw.DrawMorganEnv(m, aid, r)
        self.assertEqual(svg1, svg2)
        self.assertTrue("style='fill:#CCCCCC;" in svg1)
        self.assertTrue("style='fill:#E5E533;" in svg1)
        self.assertTrue("style='fill:#9999E5;" in svg1)

        svg1 = Draw.DrawMorganBit(m, 872, bi, centerColor=None)
        aid, r = bi[872][0]
        svg2 = Draw.DrawMorganEnv(m, aid, r, centerColor=None)
        self.assertEqual(svg1, svg2)
        self.assertTrue("style='fill:#CCCCCC;" in svg1)
        self.assertTrue("style='fill:#E5E533;" in svg1)
        self.assertFalse("style='fill:#9999E5;" in svg1)
        with self.assertRaises(KeyError):
            Draw.DrawMorganBit(m, 32, bi)
Пример #16
0
def smi2vec(smi):
    mol = Chem.MolFromSmiles(smi)
    bit_vec = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol,
                                                             4,
                                                             nBits=n_bit)
    vec = [bit_vec[i] for i in range(n_bit)]

    return vec
def calc_morgan_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol,
                                                        RADIUS,
                                                        nBits=FP_SIZE)
    a = np.zeros((0, ), dtype=np.float32)
    Chem.DataStructs.ConvertToNumpyArray(fp, a)
    return a
Пример #18
0
def reward_target_molecule_similarity(mol,
                                      target,
                                      radius=2,
                                      nBits=2048,
                                      useChirality=True):
    """
    Reward for a target molecule similarity, based on tanimoto similarity
    between the ECFP fingerprints of the x molecule and target molecule
    :param mol: rdkit mol object
    :param target: rdkit mol object
    :return: float, [0.0, 1.0]
    """
    x = rdMolDescriptors.GetMorganFingerprintAsBitVect(
        mol, radius=radius, nBits=nBits, useChirality=useChirality)
    target = rdMolDescriptors.GetMorganFingerprintAsBitVect(
        target, radius=radius, nBits=nBits, useChirality=useChirality)
    return DataStructs.TanimotoSimilarity(x, target)
Пример #19
0
def getFingerprintFromMolecule( moles, nBits=2048 ) :
    fps = [ rdMolDescriptors.GetMorganFingerprintAsBitVect( m, 2, nBits=nBits ) for m in moles ]
    np_fps = []
    for fp in fps:
        arr = np.zeros( ( 1, ) )
        #DataStructs.ConvertToNumpyArray( fp, arr )
        DataStructs.cDataStructs.ConvertToNumpyArray( fp, arr )
        np_fps.append( arr )
    return np.array( np_fps )
Пример #20
0
 def morgan_fingerprinter(mol):
     fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
         mol,
         radius,
         nBits=fpSize,
         useChirality=useChirality,
         useBondTypes=useBondTypes,
         useFeatures=useFeatures)
     return _fp_to_bytes(fp)
Пример #21
0
def features_ext(smile_string, radius=2, nBits=256):

    mols = Chem.rdmolfiles.MolFromSmiles(smile_string)
    fps = rdMolDescriptors.GetMorganFingerprintAsBitVect(mols,
                                                         radius=radius,
                                                         bitInfo=dict(),
                                                         nBits=nBits)

    return np.array(fps)
Пример #22
0
def pred_(model, string_mol, inputopt):
    # Labels
    labels = np.array([
        'Alcohol', 'Aldehyde', 'Alicycle', 'Amide', 'Aromatic', 'Carbocycle',
        'Carboxylic acid', 'Chiral', 'Ester', 'Ether', 'Fused rings', 'Ketone',
        'Lactame', 'Metal-organic', 'Nitrogen heterocycle',
        'Oxygen heterocycle', 'Sulfide', 'Sulfur heterocycle', 'Thiol', 'Urea'
    ])

    if inputopt == 'InChI':
        # Read molecule input
        molecule = Chem.inchi.MolFromInchi(string_mol)
    if inputopt == 'SMILES':
        # Read molecule input
        molecule = Chem.MolFromSmiles(string_mol)

    # Convert input molecule to descriptors
    bi = {}
    morganFP = rdMolDescriptors.GetMorganFingerprintAsBitVect(molecule,
                                                              radius=2,
                                                              bitInfo=bi,
                                                              nBits=1024)

    train_fps_array = []
    morganFP_array = np.zeros((1, ), dtype=int)
    DataStructs.ConvertToNumpyArray(morganFP, morganFP_array)
    train_fps_array.append(morganFP_array)

    # Classification
    prediction = model.predict(np.array(train_fps_array),
                               batch_size=1,
                               verbose=1)

    result = pd.DataFrame(prediction, columns=labels)
    result_bin = result.round(0).astype(int)
    result_labels = result_bin.astype(bool).to_numpy().tolist()

    result_confidence = result.to_numpy()

    # Display Result
    result_display = labels[tuple(result_labels)]
    result_confidence_float = [conf for conf in result_confidence[0]
                               ]  # use for sorting (list)

    # Transforming into percentages
    result_confidence = [format(n, '.2%') for n in result_confidence_float
                         ]  # use for stack (str)

    # Formatting results
    result_display = ', '.join(result_display)
    if result_display == '':
        result_display = 'No functional groups found'
    result_confidence = np.column_stack((labels, result_confidence))  # Stack
    result_confidence = result_confidence[np.argsort(result_confidence_float)
                                          [::-1]]  # Order descending

    return result_display, result_confidence
Пример #23
0
def fingerprint_features(smile_string, radius=2, size=2048):
    mol = MolFromSmiles(smile_string)
    new_order = rdmolfiles.CanonicalRankAtoms(mol)
    mol = rdmolops.RenumberAtoms(mol, new_order)
    return rdMolDescriptors.GetMorganFingerprintAsBitVect(mol,
                                                          radius,
                                                          nBits=size,
                                                          useChirality=True,
                                                          useBondTypes=True,
                                                          useFeatures=False)
Пример #24
0
def smiles_to_fingerprint_bin(smiles, trust_smiles=False):
    mol = Chem.MolFromSmiles(smiles, sanitize=(not trust_smiles))
    if mol is None:
        return None
    if trust_smiles:
        mol.UpdatePropertyCache()
        Chem.FastFindRings(mol)
    fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 2, BITCOUNT)

    return DataStructs.BitVectToBinaryText(fp)
Пример #25
0
 def smiles_to_ecfp_list(smi_list):
     from rdkit.Chem import rdMolDescriptors
     ecfp = []
     for i in smi_list:
         ecfp.append([
             int(j) for j in list(
                 rdMolDescriptors.GetMorganFingerprintAsBitVect(
                     Chem.MolFromSmiles(
                         i), radius=6, nBits=512).ToBitString())
         ])
     return ecfp
Пример #26
0
    def draw_fragment(self,
                      fragment_id: Union[str, int],
                      show_zscore: bool = True) -> str:
        """Draw a specified fragmnet.

        Args:
            fragment_id (Union[str, int]): User-defined fragment string, or position of the
                Morgan fingerprint bit to be drawn.
            show_zscore (bool, optional): Annotate drawing with zscore. Defaults to True.

        Returns:
            str: Molecule drawing SVG.
        """

        # images will be annotated with zscore
        legend = f"zscore = {self.zscores[fragment_id]:.2f}" if show_zscore else ""

        # handle drawing of user-defined fragments
        if self.user_frags:
            mol = Chem.MolFromSmarts(fragment_id)
            img = Draw.MolsToGridImage([mol],
                                       molsPerRow=1,
                                       subImgSize=(200, 200),
                                       legends=[legend])

        # handle drawing of auto-generated fragments
        mol = self._get_mol_with_frag(fragment_id)

        bit_info = {}
        if self.fp_type == "morgan":
            _ = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                mol, radius=self.fp_rad, nBits=self.fp_bits, bitInfo=bit_info)

            img = Draw.DrawMorganBit(mol,
                                     fragment_id,
                                     bit_info,
                                     useSVG=True,
                                     legend=legend)

        if self.fp_type == "rdkit":
            _ = Chem.RDKFingerprint(
                mol,
                minPath=self.fp_rad,
                maxPath=self.fp_rad,
                fpSize=self.fp_bits,
                bitInfo=bit_info,
            )

            img = Draw.DrawRDKitBit(mol,
                                    fragment_id,
                                    bit_info,
                                    useSVG=True,
                                    legend=legend)
        return img
Пример #27
0
  def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
    """Calculate circular fingerprint.

    Parameters
    ----------
    datapoint: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      A numpy array of circular fingerprint.
    """
    try:
      from rdkit import Chem
      from rdkit.Chem import rdMolDescriptors
    except ModuleNotFoundError:
      raise ImportError("This class requires RDKit to be installed.")
    if 'mol' in kwargs:
      datapoint = kwargs.get("mol")
      raise DeprecationWarning(
          'Mol is being phased out as a parameter, please pass "datapoint" instead.'
      )
    if self.sparse:
      info: Dict = {}
      fp = rdMolDescriptors.GetMorganFingerprint(
          datapoint,
          self.radius,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features,
          bitInfo=info)
      fp = fp.GetNonzeroElements()  # convert to a dict

      # generate SMILES for fragments
      if self.smiles:
        fp_smiles = {}
        for fragment_id, count in fp.items():
          root, radius = info[fragment_id][0]
          env = Chem.FindAtomEnvironmentOfRadiusN(datapoint, radius, root)
          frag = Chem.PathToSubmol(datapoint, env)
          smiles = Chem.MolToSmiles(frag)
          fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
        fp = fp_smiles
    else:
      fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
          datapoint,
          self.radius,
          nBits=self.size,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features)
      fp = np.asarray(fp, dtype=float)
    return fp
Пример #28
0
 def get_input_features(self, mol):
     try:
         fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
             mol, self.radius)
     except Exception as e:
         logger = getLogger(__name__)
         logger.debug('exception caught at ECFPPreprocessor:', e)
         # Extracting feature failed
         raise MolFeatureExtractionError
     # TODO(Nakago): Test it.
     return numpy.asarray(fp, numpy.float32)
def get_morgan(molecule, length=512):
    try:
        # radius=2 = ECFP4, radius=3 = ECFP6, etc.
        desc = rdMolDescriptors.GetMorganFingerprintAsBitVect(molecule,
                                                              2,
                                                              nBits=length)
    except Exception as e:
        print(e)
        print('error ' + str(molecule))
        desc = np.nan
    return desc
Пример #30
0
def index_row(key, smiles):
    err = ''
    morgan_fp = ''

    try:
        mol = Chem.MolFromSmiles(smiles)
        fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=2)
        morgan_fp = fp.ToBase64()
    except Exception as e:
        err = f'Exception {e} processing {smiles}'
    return {'key': key, 'morgan_fp': morgan_fp, 'error': err}