Пример #1
0
    def add_atom(self, mol):
        old_mol = AllChem.Mol(mol)

        # probability of adding ring atom
        if np.random.random() < 0.63:
            rxn_smarts = np.random.choice(self.rxn_smarts_ring_list,
                                          p=self.p_ring)
            smarts = AllChem.MolFromSmarts('[r3,r4,r5]')

            if not mol.HasSubstructMatch(
                    smarts) or AllChem.CalcNumAliphaticRings(mol) == 0:
                rxn_smarts = np.random.choice(self.rxn_smarts_make_ring,
                                              p=self.p_make_ring)

                # probability of starting a fused ring
                if np.random.random() < 0.036:
                    rxn_smarts = rxn_smarts.replace("!", "")
        else:
            if mol.HasSubstructMatch(
                    AllChem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
                rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
            else:
                rxn_smarts = np.random.choice(self.rxn_smarts_list, p=self.p)

        mol = self.run_rxn(rxn_smarts, mol)

        if self.valences_not_too_large(mol):
            return mol
        else:
            return old_mol
Пример #2
0
def test_monomer_importer(json_importer, independent_importers):
    monomer_importer = importers.MonomerImporter(json_importer)
    ids = monomer_importer.import_data()

    monomer_repo = repo.create_monomer_repository()
    backbone_repo = repo.create_backbone_repository()
    monomer_data = list(monomer_repo.load(ids))
    monomer_docs = json_importer.load(monomer_importer.saver.TYPE.STRING)
    backbone_data = list(backbone_repo.load())
    kekules = [doc['kekule'] for doc in monomer_docs]
    backbones = [mol.to_reduced_dict() for mol in backbone_data]

    assert(len(monomer_data) == 4)
    for mol in monomer_data:
        rdkit_mol = mol.mol
        assert(mol._id != None)
        assert(mol.required == bool(AllChem.CalcNumAromaticRings(rdkit_mol)))
        assert(mol.backbone in backbones)
        assert(mol.sidechain is None)
        assert(mol.connection is None)
        assert(mol.proline == bool(AllChem.CalcNumAliphaticRings(
            rdkit_mol) and rdkit_mol.HasSubstructMatch(PROLINE_N_TERM)))
        assert(mol.imported == True)
        assert(mol.kekule in kekules)
        kekules.remove(mol.kekule)
Пример #3
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [
                MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy,
                spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR,
                TPSA, valence, mr
            ]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
Пример #4
0
def add_atom(mol):
    if np.random.random() < 0.63:  # probability of adding ring atom
        rxn_smarts = np.random.choice(rxn_smarts_ring_list, p=p_ring)
        if not mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]'))\
           or AllChem.CalcNumAliphaticRings(mol) == 0:
            rxn_smarts = np.random.choice(rxn_smarts_make_ring, p=p_make_ring)
            if np.random.random(
            ) < 0.056:  # probability of starting a fused ring
                rxn_smarts = rxn_smarts.replace("!", "")
    else:
        if mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
            rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
        else:
            rxn_smarts = np.random.choice(rxn_smarts_list, p=p)

    mol = run_rxn(rxn_smarts, mol)

    return mol
Пример #5
0
def properties(fnames, labels, is_active=False):
    """ Five structural properties calculation for each molecule in each given file.
    These properties contains No. of Hydrogen Bond Acceptor/Donor, Rotatable Bond,
    Aliphatic Ring, Aromatic Ring and Heterocycle.

    Arguments:
        fnames (list): the file path of molecules.
        labels (list): the label for each file in the fnames.
        is_active (bool, optional): selecting only active ligands (True) or all of the molecules (False)
            if it is true, the molecule with PCHEMBL_VALUE >= 6.5 or SCORE > 0.5 will be selected.
            (Default: False)

    Returns:
        df (DataFrame): the table contains three columns; 'Set' is the label
            of fname the molecule belongs to, 'Property' is the name of one
            of five properties, 'Number' is the property value.
    """

    props = []
    for i, fname in enumerate(fnames):
        df = pd.read_table(fname)
        if 'SCORE' in df.columns:
            df = df[df.SCORE > (0.5 if is_active else 0)]
        elif 'PCHEMBL_VALUE' in df.columns:
            df = df[df.PCHEMBL_VALUE >= (6.5 if is_active else 0)]
        df = df.drop_duplicates(subset='CANONICAL_SMILES')
        if len(df) > int(1e5):
            df = df.sample(int(1e5))
        for smile in tqdm(df.CANONICAL_SMILES):
            mol = Chem.MolFromSmiles(smile)
            HA = Lipinski.NumHAcceptors(mol)
            props.append([labels[i], 'Hydrogen Bond\nAcceptor', HA])
            HD = Lipinski.NumHDonors(mol)
            props.append([labels[i], 'Hydrogen\nBond Donor', HD])
            RB = Lipinski.NumRotatableBonds(mol)
            props.append([labels[i], 'Rotatable\nBond', RB])
            RI = AllChem.CalcNumAliphaticRings(mol)
            props.append([labels[i], 'Aliphatic\nRing', RI])
            AR = Lipinski.NumAromaticRings(mol)
            props.append([labels[i], 'Aromatic\nRing', AR])
            HC = AllChem.CalcNumHeterocycles(mol)
            props.append([labels[i], 'Heterocycle', HC])
    df = pd.DataFrame(props, columns=['Set', 'Property', 'Number'])
    return df
Пример #6
0
def add_atom(rdkit_mol, stats: Stats):
    old_mol = Chem.Mol(rdkit_mol)
    if np.random.random() < 0.63:  # probability of adding ring atom
        rxn_smarts = np.random.choice(stats.rxn_smarts_ring_list, p=stats.p_ring)
        if not rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[r3,r4,r5]')) \
                or AllChem.CalcNumAliphaticRings(rdkit_mol) == 0:
            rxn_smarts = np.random.choice(stats.rxn_smarts_make_ring, p=stats.p_ring)
            if np.random.random() < 0.036:  # probability of starting a fused ring
                rxn_smarts = rxn_smarts.replace("!", "")
    else:
        if rdkit_mol.HasSubstructMatch(Chem.MolFromSmarts('[*]1=[*]-[*]=[*]-1')):
            rxn_smarts = '[r4:1][r4:2]>>[*:1]C[*:2]'
        else:
            rxn_smarts = np.random.choice(stats.rxn_smarts_list, p=stats.p)

    rdkit_mol = run_rxn(rxn_smarts, rdkit_mol)
    if valences_not_too_large(rdkit_mol):
        return rdkit_mol
    else:
        return old_mol
 def is_proline(mol):
     return bool(
         AllChem.CalcNumAliphaticRings(mol)
         and mol.HasSubstructMatch(PROLINE_N_TERM))