Пример #1
0
def get_mordred_descriptors(smiles_list):

    calc = mordred.Calculator()

    calc.register(mordred.AtomCount)  #16
    calc.register(mordred.RingCount)  #139
    calc.register(mordred.BondCount)  #9
    calc.register(mordred.HydrogenBond)  #2
    calc.register(mordred.CarbonTypes)  #10
    calc.register(mordred.SLogP)  #2
    calc.register(mordred.Constitutional)  #16
    calc.register(mordred.TopoPSA)  #2
    calc.register(mordred.Weight)  #2
    calc.register(mordred.Polarizability)  #2
    calc.register(mordred.McGowanVolume)  #1

    name_list = []
    for desc_name in calc.descriptors:
        name_list.append(str(desc_name))

    descriptors_list = []
    for smiles in smiles_list:
        # print(smiles)
        mol = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(mol)
        calculated_descriptors = calc(mol)
        descriptors_list.append(calculated_descriptors._values)

    descriptors_df = pd.DataFrame(descriptors_list, columns=name_list)
    descriptors_df = descriptors_df.select_dtypes(exclude=['object'])

    return descriptors_df
Пример #2
0
def hdonor_count(mol):
    try:
        return list(
            mordred.Calculator(
                mordred.HydrogenBond.HBondDonor)(mol).values())[0]
    except:
        return None
Пример #3
0
def Mordred_2D(
    mol: Union[Union[pybel.Molecule, Chem.Mol], List[Union[pybel.Molecule,
                                                           Chem.Mol]]]
) -> dict:
    """Calculate 2D molecular descriptors with mordred.

    :param mol: either one or multiple molecules the fingerprints will be calculated from.
    """
    mordred_calculator = mordred.Calculator(descriptors, ignore_3D=True)
    if isinstance(mol, list):
        results = []
        for i in range(len(mol)):
            result = {}
            for descriptor in mordred_calculator.descriptors:
                try:
                    if isinstance(mol[i], Chem.Mol):
                        result[str(descriptor)] = descriptor(mol[i])
                    else:
                        mol_ = Chem.MolFromMolBlock(mol[i].write('mol'))
                        result[str(descriptor)] = descriptor(mol_)
                except (ZeroDivisionError, ValueError):
                    pass
            results.append(result)
    else:
        results = {}
        for descriptor in mordred_calculator.descriptors:
            try:
                if isinstance(mol, Chem.Mol):
                    results[str(descriptor)] = descriptor(mol)
                else:
                    mol_ = Chem.MolFromMolBlock(mol.write('mol'))
                    results[str(descriptor)] = descriptor(mol_)
            except (ZeroDivisionError, ValueError):
                pass
    return results
Пример #4
0
def acid_count(mol):
    try:
        return list(
            mordred.Calculator(
                mordred.AcidBase.AcidicGroupCount)(mol).values())[0]
    except:
        return None
Пример #5
0
def rotate_bond_count(mol):
    try:
        return list(
            mordred.Calculator(
                mordred.RotatableBond.RotatableBondsCount)(mol).values())[0]
    except:
        return None
Пример #6
0
    def find_model(self):
        """Finds and loads a model"""
        modelFilepath = filedialog.askopenfile()
        self.modelname[
            'text'] = "Model Name: " + modelFilepath.name[:-7].split(
                '/')[-1] + "\n"
        self.model = model('load')
        self.model.load_model(modelFilepath.name)

        self.modelname['text'] += "Model Type: " + str(
            self.model.information['Training']['Model Type']) + "\n"
        self.modelname['text'] += "Model Parameters: " + str(
            self.model.information['Training']['Model Parameters']) + "\n"
        self.modelname['text'] += "PCA: " + str(
            self.model.information['Training']['PCA']) + "\n"
        self.modelname['text'] += "Features: " + str(
            self.model.information['Training']['Features']) + "\n"
        self.modelname['text'] += "Training Samples: " + str(
            self.model.information['Training']['Samples']) + "\n"
        self.modelname['text'] += "Training RMSE: " + str(
            self.model.information['Training']['RMSE']) + " ºC\n"
        self.modelname['text'] += "Test Samples: " + str(
            self.model.information['Testing']['Samples']) + "\n"
        self.modelname['text'] += "Test RMSE: " + str(
            self.model.information['Testing']['RMSE']) + " ºC"

        self.calc = m.Calculator()
        for desc in self.model.getDescriptors():
            self.calc.register(alldesc[desc])

        mol = Chem.MolFromSmiles(self.SMILES.get())
        if mol == None:
            self.mp['text'] = "Please enter valid SMILES string"
        else:
            self.mp['text'] = self.predictMP(mol)
def generateDescriptors(dataset,filename,descs=None,big=None): 
    """
    Generates descriptor .csv file for given dataset
    Note: Run inside of if __name__ == "__main__":
    Parameters
    ----------
    dataset : string
        Filepath of .csv dataset to generate descriptors for
    filename : string
        Filepath to save new descriptor dataset to
    decs : dict
        Dictionary of mordred chemical descriptors
        Defaults to using all Mordred descriptors if not specified
    big : integer
        The size of each batch size
        Only calculates in batches if big is specified
    """
    
    data = pd.read_csv(dataset)
    mols = [Chem.MolFromSmiles(smi) for smi in data['SMILES']]

    calc = m.Calculator()
    if descs:
        for mod in descs:
            calc.register(mod)
    else:
        calc.register(descriptors)
    if big:
        
        with tempfile.NamedTemporaryFile() as temp:
            df = pd.DataFrame()
            df.to_csv(temp.name + '.csv')
            for i in range(0, len(mols), big):
                molcalc =calc.pandas(mols[i:i + big])
                molcalc.index = data['SMILES'][i:i + big]
                frame = pd.read_csv(temp.name + '.csv')
                frame = frame.append(molcalc,ignore_index=False)
                frame.to_csv(temp.name + '.csv')
                frame = None
                molcalc = None
            df = pd.read_csv(temp.name + '.csv')
            df = df.dropna(axis=1)
            df = df._get_numeric_data()
            df['MP'] = data['Melting Point {measured, converted}']
            df.index = data['SMILES']
    else:
        df = calc.pandas(mols)
        df = df._get_numeric_data()
        df['MP'] = data['Melting Point {measured, converted}']
        df.index = data['SMILES']
    
    if descs:
        nameString = filename+" descriptors = "+str(list(d.__name__.strip("mordred.") for d in descs))+" .csv"
    else:
        nameString = filename+" descriptors = All .csv"
    df.to_csv(nameString)
def get_fingerprints(smiles, type='Morgan'):
    fingerprints = []
    calculator = mordred.Calculator()
    if type == 'Morgan':
        for smile in smiles:
            mol = Chem.MolFromSmiles(smile)
            morgan_fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024)
            fingerprints.append(morgan_fp)
        fingerprints = np.array(fingerprints)

    if type == 'Mordred':
        for smile in smiles:
            mol = Chem.MolFromSmiles(smile)
            mordred_fp = calculator(mol)
            fingerprints.append(mordred_fp)
        fingerprints = np.array(fingerprints)
    return fingerprints
Пример #9
0
def molecular_weight(mol):
    try:
        return list(mordred.Calculator(mordred.Weight.Weight)(mol).values())[0]
    except:
        return None
Пример #10
0
def logps(mol):
    try:
        return list(mordred.Calculator(mordred.SLogP.SLogP)(mol).values())[0]
    except:
        return None
def predefined_mordred(mol, desc_type="best", desc_names=False):

    calc1 = mordred.Calculator()

    if (desc_type in ["best"]):
        calc1.register(mordred.SLogP)
        calc1.register(mordred.HydrogenBond.HBondAcceptor)
        calc1.register(mordred.HydrogenBond.HBondDonor)
        calc1.register(mordred.AtomCount.AtomCount("HeavyAtom"))
        calc1.register(mordred.TopoPSA.TopoPSA(True))
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, None, None))
        calc1.register(mordred.BondCount.BondCount("any", False))

    if (desc_type in ["all", "atom"]):
        calc1.register(mordred.AtomCount.AtomCount("X"))
        calc1.register(mordred.AtomCount.AtomCount("HeavyAtom"))
        calc1.register(mordred.Aromatic.AromaticAtomsCount)

    if (desc_type in ["all", "bond"]):
        calc1.register(mordred.HydrogenBond.HBondAcceptor)
        calc1.register(mordred.HydrogenBond.HBondDonor)
        calc1.register(mordred.RotatableBond.RotatableBondsCount)
        calc1.register(mordred.BondCount.BondCount("any", False))
        calc1.register(mordred.Aromatic.AromaticBondsCount)
        calc1.register(mordred.BondCount.BondCount("heavy", False))
        calc1.register(mordred.BondCount.BondCount("single", False))
        calc1.register(mordred.BondCount.BondCount("double", False))
        calc1.register(mordred.BondCount.BondCount("triple", False))

    if (desc_type in ["all", "topological"]):
        calc1.register(mordred.McGowanVolume.McGowanVolume)
        calc1.register(mordred.TopoPSA.TopoPSA(True))
        calc1.register(mordred.TopoPSA.TopoPSA(False))
        calc1.register(mordred.MoeType.LabuteASA)
        calc1.register(mordred.Polarizability.APol)
        calc1.register(mordred.Polarizability.BPol)
        calc1.register(mordred.AcidBase.AcidicGroupCount)
        calc1.register(mordred.AcidBase.BasicGroupCount)
        calc1.register(
            mordred.EccentricConnectivityIndex.EccentricConnectivityIndex)
        calc1.register(mordred.TopologicalCharge.TopologicalCharge("raw", 1))
        calc1.register(mordred.TopologicalCharge.TopologicalCharge("mean", 1))

    if (desc_type in ["all", "index"]):
        calc1.register(mordred.SLogP)
        calc1.register(mordred.BertzCT.BertzCT)
        calc1.register(mordred.BalabanJ.BalabanJ)
        calc1.register(mordred.WienerIndex.WienerIndex(True))
        calc1.register(mordred.ZagrebIndex.ZagrebIndex(1, 1))
        calc1.register(mordred.ABCIndex)

    if (desc_type in ["all", "ring"]):
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, None, None))
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, None, True))
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, True, None))
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, True, True))
        calc1.register(
            mordred.RingCount.RingCount(None, False, False, False, None))
        calc1.register(
            mordred.RingCount.RingCount(None, False, True, None, None))

    if (desc_type in ["all", "estate"]):
        calc1.register(mordred.EState)

# if desc_names is "True" returns only name list
    if (desc_names):
        name_list = []
        for desc in calc1.descriptors:
            name_list.append(str(desc))
        return name_list
#        return list(calc1._name_dict.keys())
    else:
        result = calc1(mol)
        return result._values
Пример #12
0
import tkinter as tk
from tkinter import filedialog, CENTER, S
from random import randint
from Model_Training import model
from rdkit import Chem
from rdkit.Chem import Draw
from mordred import descriptors
import mordred as m
import numpy as np
import pandas as pd
from PIL import ImageTk, Image
from pubchemprops.pubchemprops import get_second_layer_props
import pubchempy as pcp

testcalc = m.Calculator(descriptors)
alldesc = dict(
    (descript.__str__(), descript) for descript in testcalc.descriptors)


class Overview(tk.Tk):
    def __init__(self, *args, **kwargs):
        tk.Tk.__init__(self, *args, **kwargs)
        container = tk.Frame(self)
        self.attributes("-fullscreen", True)
        self.title("Chemical Melting Point Prediction Program")
        container.pack(side="top", fill="both", expand=True)
        container.grid_rowconfigure(0, weight=1)
        container.grid_columnconfigure(0, weight=1)
        self.frames = {}
        for F in (mainPage, predictionPage, examplesPage):
            page_name = F.__name__