Пример #1
0
def CalculateLabuteASA(mol):
    """
    #################################################################
    Calculation of Labute's Approximate Surface Area (ASA from MOE)
    
    Usage:
        
        result=CalculateLabuteASA(mol)
        
        Input: mol is a molecule object
        
        Output: result is a dict form 
    #################################################################
    """
    res={}
    temp=MOE.pyLabuteASA(mol,includeHs=1)
    res['LabuteASA']=round(temp,3)
    return res
Пример #2
0
def CalculateLabuteASA(mol):
    """
    #################################################################
    Calculation of Labute's Approximate Surface Area (ASA from MOE)

    Usage:

        result=CalculateLabuteASA(mol)

        Input: mol is a molecule object

        Output: result is a dict form
    #################################################################
    """
    res={}
    temp=MOE.pyLabuteASA(mol,includeHs=1)
    res['LabuteASA']=round(temp,3)
    return res
Пример #3
0
    def testTPSAShort(self):
        " Short TPSA test "
        inName = RDConfig.RDDataDir + '/NCI/first_200.tpsa.csv'
        inF = open(inName, 'r')
        lines = inF.readlines()
        for line in lines:
            if line[0] != '#':
                line.strip()
                smi, ans = line.split(',')
                ans = float(ans)

                mol = Chem.MolFromSmiles(smi)

                calc = MolSurf.TPSA(mol)
                assert feq(
                    calc,
                    ans), 'bad TPSA for SMILES %s (%.2f != %.2f)' % (smi, calc,
                                                                     ans)
Пример #4
0
def PhyChem(smiles):
    """ Calculating the 19D physicochemical descriptors for each molecules,
    the value has been normalized with Gaussian distribution.

    Arguments:
        smiles (list): list of SMILES strings.
    Returns:
        props (ndarray): m X 19 matrix as normalized PhysChem descriptors.
            m is the No. of samples
    """
    props = []
    for smile in smiles:
        mol = Chem.MolFromSmiles(smile)
        try:
            MW = desc.MolWt(mol)
            LOGP = Crippen.MolLogP(mol)
            HBA = Lipinski.NumHAcceptors(mol)
            HBD = Lipinski.NumHDonors(mol)
            rotable = Lipinski.NumRotatableBonds(mol)
            amide = AllChem.CalcNumAmideBonds(mol)
            bridge = AllChem.CalcNumBridgeheadAtoms(mol)
            heteroA = Lipinski.NumHeteroatoms(mol)
            heavy = Lipinski.HeavyAtomCount(mol)
            spiro = AllChem.CalcNumSpiroAtoms(mol)
            FCSP3 = AllChem.CalcFractionCSP3(mol)
            ring = Lipinski.RingCount(mol)
            Aliphatic = AllChem.CalcNumAliphaticRings(mol)
            aromatic = AllChem.CalcNumAromaticRings(mol)
            saturated = AllChem.CalcNumSaturatedRings(mol)
            heteroR = AllChem.CalcNumHeterocycles(mol)
            TPSA = MolSurf.TPSA(mol)
            valence = desc.NumValenceElectrons(mol)
            mr = Crippen.MolMR(mol)
            # charge = AllChem.ComputeGasteigerCharges(mol)
            prop = [MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro,
                    FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr]
        except Exception:
            print(smile)
            prop = [0] * 19
        props.append(prop)
    props = np.array(props)
    props = Scaler().fit_transform(props)
    return props
Пример #5
0
  def _testTPSALongNCI(self):
    " Long TPSA test "
    fileN = 'tpsa_regr.csv'
    with open(os.path.join(RDConfig.RDCodeDir,'Chem','test_data',fileN),'r') as inF:
      lines = inF.readlines()
    lineNo = 0
    for line in lines:
      lineNo+=1
      if line[0] != '#':
        line.strip()
        smi,ans = line.split(',')
        ans = float(ans)

        mol = Chem.MolFromSmiles(smi)
        assert mol,"line %d, failed for smiles: %s"%(lineNo,smi)

      
        calc = MolSurf.TPSA(mol)
        assert feq(calc,ans),'line %d: bad TPSA for SMILES %s (%.2f != %.2f)'%(lineNo,smi,calc,ans)
Пример #6
0
def CalculateTPSA(mol):
    """
    #################################################################
    Calculation of topological polar surface area based on fragments.
    
    Implementation based on the Daylight contrib program tpsa.
    
    Usage:
        
        result=CalculateTPSA(mol)
        
        Input: mol is a molecule object
        
        Output: result is a dict form 
    #################################################################
    """
    res = {}
    temp = MOE.TPSA(mol)
    res['TPSA'] = round(temp, 3)
    return res
Пример #7
0
def describe_atom(atom_object, use_formal_charge=False, use_Gasteiger=False):
    mol = atom_object.GetOwningMol()
    contribs = MolSurf._LabuteHelper(mol)
    idx = atom_object.GetIdx()
    code = {
        'SP': 1,
        'SP2': 2,
        'SP3': 3,
        'UNSPECIFIED': -1,
        'UNKNOWN': -1,
        'S': 0,
        'SP3D': 4,
        'SP3D2': 5
    }
    result = []
    symbol = atom_object.GetSymbol()
    result.append(atom_object.GetAtomicNum())
    try:
        one_hot = [0.0 for _ in range(7)]
        hib = code[atom_object.GetHybridization().name]
        one_hot[hib + 1] = 1.0
        #result+=one_hot
        result.append(hib)
        result.append(atom_object.GetTotalValence())
    except:
        print(Chem.MolToSmiles(mol, canonical=0), idx)
        raise
    result.append(
        max(atom_object.GetNumImplicitHs(), atom_object.GetNumExplicitHs()))
    result.append(p_table.GetNOuterElecs(symbol))
    result.append(electronegativity.get(symbol, 0))
    result.append(float(atom_object.GetIsAromatic()))
    if use_formal_charge:
        result.append(atom_object.GetFormalCharge())
    if use_Gasteiger:
        q_in_neu = atom_object.GetDoubleProp(
            '_GasteigerHCharge') + atom_object.GetDoubleProp(
                '_GasteigerCharge')
        result.append(q_in_neu)
    result.append(contribs[idx + 1])
    return result
    def filter_druglikeness_5_rules(self, smiles):

        count = 0
        for i in smiles:
            mol = Chem.MolFromSmiles(i)
            mol = Chem.RemoveHs(mol)

            MW = rdmd._CalcMolWt(mol)
            ALOGP = Crippen.MolLogP(mol)
            HBA = rdmd.CalcNumHBA(mol)
            HBD = rdmd.CalcNumHBD(mol)
            PSA = MolSurf.TPSA(mol)
            ROTB = rdmd.CalcNumRotatableBonds(
                mol, rdmd.NumRotatableBondsOptions.Strict)

            if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11:
                smiles.remove(i)
                count = count + 1
        print("unavaliable rule_5_drug:%i" % count)

        return smiles
Пример #9
0
 def testBug12a(self):
     from rdkit.Chem import MolSurf
     inD = [
         ('OC(=O)[CH](CC1=CC=CC=C1)C2=CC=CC=C2', 37.3),
         ('OC(=O)C(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=CC=C3', 37.3),
         ('CCC(CC)(CC)[CH](OC(=O)C1=C(C=CC=C1)C(O)=O)C2=CC=CC=C2', 63.6),
         ('C[C](O)([CH](C(O)=O)C1=CC=CC=C1)C2=CC=CC=C2', 57.53),
         ('C[CH]([CH](C(O)=O)C1=CC=CC=C1)C2=CC=CC=C2', 37.3),
         ('OC(=O)CBr', 37.3),
         ('OC(=O)CCl', 37.3),
         ('OC(=O)C=CC(=O)C1=CC=CC=C1', 54.37),
         ('NC1=C(C=CC=C1)C(O)=O', 63.32),
         ('OC(=O)C1=CC=CC=C1', 37.3),
         ('CN(C)C(=N)NC1=NC(=C2C=C(Cl)C=CC2=N1)C.O[N+]([O-])=O', 128.27),
         ('CCN(CC)C(=N)NC1=NC(=C2C=C(Cl)C=CC2=N1)C.O[N+]([O-])=O', 128.27),
         ('ON(O)NC(=N)NN=C1C(=O)NC2=C1C=CC=C2', 133.07),
         ('NC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 129.99),
         ('CC(=O)NC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 133.07),
         ('COC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 113.2),
         ('ON(O)NC(=N)NN=CC1=CC=CC=C1', 103.97),
         ('ON(O)NC(=N)NN=CC=CC1=CC=CC=C1', 103.97),
         ('ON(O)NC(=N)NN=CC1=C(Cl)C=C(Cl)C=C1', 103.97),
         ('CC(C)=CCCC(C)=CC=NNC(=N)NN(O)O', 103.97),
         ('CN(C)C1=CC=C(C=C1)C=NNC(=N)NN(O)O', 107.21),
         ('ON(O)NC(=N)NN=CC1=CC=CO1', 117.11),
         ('ON(O)NC(=N)NN=CC1=CC=C(O)C=C1', 124.2),
         ('CC(C)C1=CC=C(C=C1)C=NNC(=N)NN(O)O', 103.97),
         ('COC1=C(C=CC=C1)C=NNC(=N)NN(O)O', 113.2),
         ('ON(O)NC(=N)NN=CC1=C(C=CC=C1)[N+]([O-])=O', 147.11),
         ('ON(O)NC(=N)NN=CC1=CC=C(C=C1)[N+]([O-])=O', 147.11),
         ('ON(O)NC(=N)NN=CC1=C(O)C=CC(=C1)[N+]([O-])=O', 167.34),
         ('ON(O)NC(=N)NN=CC1=CC=NC=C1', 116.86),
         ('ON(O)NC(=N)NN=CC1=CC=CC=N1', 116.86),
         ('ON(O)NC(=N)NN=CC1=CC=CN=C1', 116.86),
     ]
     for smi, val in inD:
         mol = Chem.MolFromSmiles(smi)
         v = MolSurf.TPSA(mol)
         assert feq(
             v, val), 'bad TPSA (%f != %f) for smiles: %s' % (v, val, smi)
Пример #10
0
def properties(mol):
    """
  Calculates the properties that are required to calculate the QED descriptor.
  """
    matches = []
    if (mol is None):
        raise TypeError('You need to provide a mol argument.')
    x = [0] * 8
    x[0] = rdmd._CalcMolWt(mol)  # MW
    x[1] = Crippen.MolLogP(mol)  # ALOGP
    for hbaPattern in Acceptors:  # HBA
        if (mol.HasSubstructMatch(hbaPattern)):
            matches = mol.GetSubstructMatches(hbaPattern)
            x[2] += len(matches)
    x[3] = Lipinski.NumHDonors(mol)  # HBD
    x[4] = MolSurf.TPSA(mol)  # PSA
    x[5] = Lipinski.NumRotatableBonds(mol)  # ROTB
    x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol),
                                              AliphaticRings))  # AROM
    for alert in StructuralAlerts:  # ALERTS
        if (mol.HasSubstructMatch(alert)): x[7] += 1
    return x
Пример #11
0
def makeFeatures(fileName):


    from rdkit import Chem
    from rdkit.Chem import AllChem
    from rdkit.Chem import MolSurf

    global featuresFile, numFeatures
    featuresFile = open(fileName, 'w')      # Molecule features output file

    drugDB = Chem.SDMolSupplier("FKBP12_binders.sdf")

    if debug:
        print "\n\tNo features data file found. Writing new features data file.\n"

    text = ""       # Placeholder for feature data
    molCount = 0

    # Select features of interest
    for mol in drugDB:
        #text += "{}\n".format(molCount)
        text += "{}\n".format(AllChem.ComputeMolVolume(mol))
        text += "{}\n".format(MolSurf.pyLabuteASA(mol))
        text += "{}\n".format(mol.GetNumAtoms())
        text += "{}\n".format(mol.GetNumBonds())
        text += "{}\n".format(mol.GetNumHeavyAtoms())
        text += "\nKI: {}\n".format(mol.GetProp("Ki (nM)"))

        text += "\n"        # Use a blank line to divide molecule data

        featuresFile.write(text)
        text = ""

        molCount += 1

    featuresFile.close()
Пример #12
0
 def test_pySMR_VSA_(self):
   for data in TestCase.readNCI_200():
     molPy = Chem.MolFromSmiles(data.smiles)
     for calcC, calcPy in zip(MolSurf.SMR_VSA_(data.mol), MolSurf.pySMR_VSA_(molPy, force=False)):
       self.assertAlmostEqual(calcC, calcPy)
Пример #13
0
 def test_pyLabuteASA(self):
   for data in TestCase.readNCI_200():
     molPy = Chem.MolFromSmiles(data.smiles)
     self.assertAlmostEqual(MolSurf.LabuteASA(data.mol), MolSurf.pyLabuteASA(molPy))
Пример #14
0
 def test_pyLabuteHelper(self):
     for data in TestCase.readNCI_200():
         molPy = Chem.MolFromSmiles(data.smiles)
         for calcC, calcPy in zip(MolSurf._LabuteHelper(data.mol),
                                  MolSurf._pyLabuteHelper(molPy)):
             self.assertAlmostEqual(calcC, calcPy)
Пример #15
0
 def test_pySMR_VSA_(self):
     for data in TestCase.readNCI_200():
         molPy = Chem.MolFromSmiles(data.smiles)
         for calcC, calcPy in zip(MolSurf.SMR_VSA_(data.mol),
                                  MolSurf.pySMR_VSA_(molPy, force=False)):
             self.assertAlmostEqual(calcC, calcPy)
Пример #16
0
 def test_pyLabuteASA(self):
     for data in TestCase.readNCI_200():
         molPy = Chem.MolFromSmiles(data.smiles)
         self.assertAlmostEqual(MolSurf.LabuteASA(data.mol),
                                MolSurf.pyLabuteASA(molPy))
Пример #17
0
def CalculateTPSA(mol):
    return round(MS.TPSA(mol), 3)
Пример #18
0
 def _testLabuteASALong2(self):
  """ test calculation of Labute's ASA value
    
  """
  col = 6
  self.__testDesc('PP_descrs_regress.2.csv',col,lambda x:MolSurf.LabuteASA(x,includeHs=1))
Пример #19
0
 def calculate(self):
     return MolSurf.LabuteASA(self.mol)
Пример #20
0
def makeFeatures(fileName):


    from rdkit import Chem
    from rdkit.Chem import Fragments
    from rdkit.Chem import AllChem
    from rdkit.Chem import MolSurf

    global featuresFile, numFeatures
    featuresFile = open(fileName, 'w')      # Molecule features output file

    # run gaussian jobs
#    gaussian.setNumMols()
#    gaussian.makeAllGinps()
#    gaussian.runGaussianOnAllGinps()

    # open database file
    drugDB = Chem.SDMolSupplier("FKBP12_binders.sdf")

    if debug:
        print "\n\tNo features data file found. Writing new features data file.\n"

    text = ""       # Placeholder for feature data
    molCount = 0
    convergedCount = 0
    converged_and_different = 0
    drug_name = []

    # load fragment descriptor
    Fragments._LoadPatterns(fileName='/usr/local/anaconda/pkgs/rdkit-2015.03.1-np19py27_0/share/RDKit/Data/FragmentDescriptors.csv')

    # Select features of interest
    for mol in drugDB:
	if molCount > -1:
#		print mol.GetProp("BindingDB Target Chain Sequence")
		gaussian_log_file = "gaussian_files/drug_"+str(molCount)+".log"
		converged, dipole, quadrupole, octapole, hexadecapole, dg_solv = gaussian.parseGaussianLog(gaussian_log_file)
		if converged == "True" and mol.GetProp("BindingDB Target Chain Sequence") == "MGVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE":
			if convergedCount ==0:
				diff = "True"
			else:
				diff = "True"
				for i in range(converged_and_different):
					if mol.GetProp("BindingDB Ligand Name") == drug_name[i]:
						diff = "False"
						break
			
			if diff == "True":
				drug_name.append(mol.GetProp("BindingDB Ligand Name"))				
				text += "{}\n".format(AllChem.ComputeMolVolume(mol))
				text += "{}\n".format(MolSurf.pyLabuteASA(mol))
				text += "{}\n".format(mol.GetNumAtoms())
				text += "{}\n".format(mol.GetNumBonds())
				text += "{}\n".format(mol.GetNumHeavyAtoms())
				text += "{}\n".format(dipole)
				text += "{}\n".format(quadrupole)
				text += "{}\n".format(octapole)
				text += "{}\n".format(hexadecapole)
				text += "{}\n".format(dg_solv)
				text += "{}\n".format(Fragments.fr_Al_OH(mol)) # aliphatic alcohols
				text += "{}\n".format(Fragments.fr_Ar_OH(mol)) # aromatic alcohols
				text += "{}\n".format(Fragments.fr_ketone(mol)) # number of ketones
				text += "{}\n".format(Fragments.fr_ether(mol)) # number of ether oxygens
				text += "{}\n".format(Fragments.fr_ester(mol)) # number of esters
				text += "{}\n".format(Fragments.fr_aldehyde(mol)) # number of aldehydes
				text += "{}\n".format(Fragments.fr_COO(mol)) # number of carboxylic acids
				text += "{}\n".format(Fragments.fr_benzene(mol)) # number of benzenes
		                text += "{}\n".format(Fragments.fr_Ar_N(mol)) # number of aromatic nitrogens
		                text += "{}\n".format(Fragments.fr_NH0(mol)) # number of tertiary amines
		                text += "{}\n".format(Fragments.fr_NH1(mol)) # number of secondary amines
		                text += "{}\n".format(Fragments.fr_NH2(mol)) # number of primary amines
		                text += "{}\n".format(Fragments.fr_amide(mol)) # number of amides
		                text += "{}\n".format(Fragments.fr_SH(mol)) # number of thiol groups
		                text += "{}\n".format(Fragments.fr_nitro(mol)) # number of nitro groups
		                text += "{}\n".format(Fragments.fr_furan(mol)) # number of furan rings
		                text += "{}\n".format(Fragments.fr_imidazole(mol)) # number of imidazole rings
		                text += "{}\n".format(Fragments.fr_oxazole(mol)) # number of oxazole rings
		                text += "{}\n".format(Fragments.fr_morpholine(mol)) # number of morpholine rings
		                text += "{}\n".format(Fragments.fr_halogen(mol)) # number of halogens
				text += "\nKI: {}\n".format(mol.GetProp("Ki (nM)"))
				text += "\n"        # Use a blank line to divide molecule data
				
				featuresFile.write(text)
				text = ""
				converged_and_different += 1
			convergedCount += 1
	else:
		break
       	molCount += 1

    print "Number of molecules with converged gaussian log files and correct sequence:", convergedCount, "\n"
    print "Number of overlap drugs:", convergedCount - converged_and_different
    featuresFile.close()
Пример #21
0
 def test_pyLabuteHelper(self):
   for data in TestCase.readNCI_200():
     molPy = Chem.MolFromSmiles(data.smiles)
     for calcC, calcPy in zip(MolSurf._LabuteHelper(data.mol), MolSurf._pyLabuteHelper(molPy)):
       self.assertAlmostEqual(calcC, calcPy)
Пример #22
0
def CalculateTPSA(mol: Chem.Mol) -> float:
    """Calculate the topological polar surface area.

    From Ertl P. et al., J.Med.Chem. (2000), 43,3714-3717.
    """
    return round(MS.TPSA(mol), 3)
def CalculatePEOEVSA(mol, bins=None):
    temp = MOE.PEOE_VSA_(mol, bins, force=1)
    res = {}
    for i, j in enumerate(temp):
        res['PEOEVSA' + str(i)] = round(j, 3)
    return res
Пример #24
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
def main():
    infile = open("molecule_training.csv", 'r')
    infile.readline()

    with open('train_molecule_new_features.csv', 'w') as f:
        writer = csv.writer(f)
        # writer.writerow(['index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors',
        #                  'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'Graph', 'smiles',
        #                  'target'])
        writer.writerow([
            'index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight',
            'Number of H-Bond Donors', 'Number of Rings',
            'Number of Rotatable Bonds', 'Polar Surface Area', 'fr_phos',
            'aromatic_carbocycles', 'MolLogP', 'PEOE_VSA1', 'Fingerprint',
            'smiles', 'target'
        ])
        for line in infile:
            line = line.strip('\n\r ')
            line = line.split(",")
            smiles = line[10].strip()
            #edge_list = to_graph(smiles)
            mol = Chem.MolFromSmiles(smiles)
            # fingerprint_explicit_bitvector = RDKFingerprint(mol)
            # fingerprint_bit_string = fingerprint_explicit_bitvector.ToBitString()
            fingerprint_bit_string = GetMorganFingerprintAsBitVect(
                mol, 2).ToBitString()
            #writer.writerow(line[:8] + [fingerprint_bit_string, line[10], line[11]])
            #writer.writerow(line[:8] + [edge_list] + [line[10], line[11]])
            fr_phos = Fragments.fr_phos_acid(mol) + Fragments.fr_phos_ester(
                mol)
            aromatic_cc = Lipinski.NumAromaticCarbocycles(mol)
            molLogP = Crippen.MolLogP(mol)
            peoe_vsa1 = MolSurf.PEOE_VSA1(mol)
            writer.writerow(line[:8] + [
                fr_phos, aromatic_cc, molLogP, peoe_vsa1,
                fingerprint_bit_string, line[10], line[11]
            ])

    infile.close()

    infile = open("molecule_TestFeatures.csv", 'r')
    infile.readline()

    with open('test_molecule_new_features.csv', 'w') as f:
        writer = csv.writer(f)
        # writer.writerow(['index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors',
        #                  'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'Graph', 'smiles',
        #                  'target'])
        writer.writerow([
            'index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight',
            'Number of H-Bond Donors', 'Number of Rings',
            'Number of Rotatable Bonds', 'Polar Surface Area', 'fr_phos',
            'aromatic_carbocycles', 'MolLogP', 'PEOE_VSA1', 'Fingerprint',
            'smiles'
        ])
        for line in infile:
            line = line.strip('\n\r ')
            line = line.split(",")
            smiles = line[10].strip()
            # edge_list = to_graph(smiles)
            mol = Chem.MolFromSmiles(smiles)
            # fingerprint_explicit_bitvector = RDKFingerprint(mol)
            # fingerprint_bit_string = fingerprint_explicit_bitvector.ToBitString()
            fingerprint_bit_string = GetMorganFingerprintAsBitVect(
                mol, 2).ToBitString()
            fr_phos = Fragments.fr_phos_acid(mol) + Fragments.fr_phos_ester(
                mol)
            aromatic_cc = Lipinski.NumAromaticCarbocycles(mol)
            molLogP = Crippen.MolLogP(mol)
            peoe_vsa1 = MolSurf.PEOE_VSA1(mol)
            writer.writerow(line[:8] + [
                fr_phos, aromatic_cc, molLogP, peoe_vsa1,
                fingerprint_bit_string, line[10]
            ])
            # writer.writerow(line[:8] + [edge_list] + [line[10], line[11]])

    infile.close()
def CalculateTPSA(mol):
    res = {}
    temp = MOE.TPSA(mol)
    res['TPSA'] = round(temp, 3)
    return res
def CalculateSLOGPVSA(mol, bins=None):
    temp = MOE.SlogP_VSA_(mol, bins, force=1)
    res = {}
    for i, j in enumerate(temp):
        res['slogPVSA' + str(i)] = round(j, 3)
    return res
Пример #28
0
def CalculateLabuteASA(mol: Chem.Mol) -> dict:
    """Calculate Labute's Approximate Surface Area (ASA from MOE)."""
    res = {}
    temp = MOE.pyLabuteASA(mol, includeHs=1)
    res['LabuteASA'] = round(temp, 3)
    return res
def CalculateLabuteASA(mol):
    res = {}
    temp = MOE.pyLabuteASA(mol, includeHs=1)
    res['LabuteASA'] = round(temp, 3)
    return res