Exemplo n.º 1
0
def main():

    filename = 'data/macrolides_smiles.csv'
    create_directory()
    df = pd.read_csv(filename)
    smiles = [standardize_smiles(i) for i in df['smiles'].values]

    start_time = time.time()
    output_filename = 'desc' + filename[4:]

    ### Compute ECFP6 Fingerprints and export file.
    ecfps_descriptor = ECFP6(smiles)
    ecfps_descriptor.compute_ECFP6(output_filename)

    ## Compute MACCS Fingerprints and export file.
    maccs_descriptor = MACCS(smiles)
    maccs_descriptor.compute_MACCS(output_filename)

    ## Compute RDKit 2D Descriptors and export file.
    rdk_descriptor = RDKit_2D(smiles)
    rdk_descriptor.compute_2Drdkit(output_filename)

    ## Compute mordred_mrc Descriptors and export file.
    mrc_descriptor = Macrocycle_Descriptors(smiles)
    mrc_descriptor.mordred_compute(output_filename)
    mrc_descriptor.compute_mordred_macrocycle(output_filename)


    duration = convert_time(time.time()-start_time)
    print(duration)
Exemplo n.º 2
0
 def testStandardizeSmLong(self):
     if not doLong:
         raise unittest.SkipTest('long test')
     for data in self.readPCSdata(self.dataPCS_standardize_smiles100k):
         try:
             ss = molvs.standardize_smiles(data.smiles)
         except Exception:
             raise AssertionError(f'Line {data.lineNo}: MolVS standardization failed for SMILES {data.smiles}')
         self.assertEqual(ss, data.expected)
Exemplo n.º 3
0
def process(smi):
    smis = smi.strip()

    # Only include compounds that exclusively
    # use tokens the model can generate
    if any(tok not in model.vocab2id for tok in tokenize(smis)):
        return None

    # Standardize SMILES
    return [molvs.standardize_smiles(smi) for smi in smis.split('.')]
Exemplo n.º 4
0
 def testStandardizeSmLong(self):
     if not doLong:
         raise unittest.SkipTest('long test')
     for data in self.readPCSdata(self.dataPCS_standardize_smiles100k):
         try:
             ss = molvs.standardize_smiles(data.smiles)
         except Exception:
             raise AssertionError(
                 'Line {0.lineNo}: MolVS standardization failed for SMILES {0.smiles}'.format(data))
         self.assertEqual(ss, data.expected)
Exemplo n.º 5
0
def main():

    filename = 'data/macrolides_smiles.csv'
    df = pd.read_csv(filename)
    smiles = [standardize_smiles(i) for i in df['smiles'].values]

    output_filename = 'desc' + filename[4:]

    ## Compute ECFP6 Fingerprints and export file.
    maccs_descriptor = ECFP6(smiles)
    maccs_descriptor.compute_ECFP6(output_filename)
Exemplo n.º 6
0
def process(fname):
    results = []
    label = int(os.path.basename(fname).replace('.json', ''))
    with open(fname, 'r') as f:
        data = json.load(f)

    ok = []
    for d in data:
        smi = d['smiles']
        if smi is None: continue

        # Validate SMILES
        errs = molvs.validate_smiles(smi)
        if errs:
            # print('Validation error(s):', errs)
            continue

        # Standardize SMILES
        smi = molvs.standardize_smiles(smi)

        # Check if exists already
        if smi in pubchem:
            # print('Exists in PubChem')
            continue

        ok.append(smi)

    #print('Kept:', len(ok))
    atc_codes = [atc_lookup[i] for i in atc_model.predict(ok)]

    for smi, atc_code in zip(ok, atc_codes):
        mol = Chem.MolFromSmiles(smi)
        formula = CalcMolFormula(mol)

        h = md5(smi.encode('utf8')).hexdigest()
        im = Draw.MolToImage(mol)
        im_path = os.path.join(images_dir, '{}.png'.format(h))
        im.save(im_path)

        results.append({
            'label': label,
            'smiles': smi,
            'formula': formula,
            'image': im_path,
            'atc_code': atc_code,
            'created_at': datetime.utcnow().isoformat()
        })

    # Save generated compounds
    with open(fname, 'w') as f:
        json.dump(results, f)
Exemplo n.º 7
0
def get_cmpd_information(molec):
    """Get information from SABIO Database of a compound with ID cID.

    """
    QUERY_URL = ('http://sabiork.h-its.org/sabioRestWebServices/'
                 'searchCompoundDetails')

    # input: SabioCompoundID
    # valid output fields: "fields[]":["Name","ChebiID",
    #                           "PubChemID","InChI",
    #                        "SabioCompoundID","KeggCompoundID"]
    params = {
        "SabioCompoundID": molec.cID,
        "fields[]": ["Name", "ChebiID", "PubChemID", "InChI"]
    }
    if molec.InChi is None:
        request = requests.post(QUERY_URL, params=params)
        request.raise_for_status()
        if request.text == 'No results found for query':
            molec.mol = None
        else:
            # results
            txt = request.text.split('\n')[1].split('\t')
            _, _, _, molec.InChi = txt
    if molec.InChi != 'null':
        print('collect SMILES from SABIO InChi')
        molec.mol = get_rdkit_mol_from_InChi(molec.InChi)
        if molec.mol is not None:
            smiles = Chem.MolToSmiles(Chem.RemoveHs(molec.mol))
            molec.SMILES = smiles
            try:
                molec.SMILES = standardize_smiles(molec.SMILES)
            except ValueError:
                print('standardization failed - therefore assume')
                print('SMILES were invalid - skip')
                molec.SMILES = None
                molec.mol = None
                # import sys
                # sys.exit()
        else:
            molec.SMILES = None
    else:
        molec.mol = None
        molec.SMILES = None
Exemplo n.º 8
0
import json
import molvs
import random
import policies
from tqdm import tqdm
from mcts import Node, mcts
import tensorflow as tf
from rdkit import Chem
from rdkit.Chem import AllChem

# Load base compounds
starting_mols = set()
with open('data/emolecules.smi', 'r') as f:
    for line in tqdm(f, desc='Loading base compounds'):
        smi = line.strip()
        smi = molvs.standardize_smiles(smi)
        starting_mols.add(smi)
print('Base compounds:', len(starting_mols))

# Load policy networks
with open('model/rules.json', 'r') as f:
    rules = json.load(f)
    rollout_rules = rules['rollout']
    expansion_rules = rules['expansion']

rollout_net = policies.RolloutPolicyNet(n_rules=len(rollout_rules))
expansion_net = policies.ExpansionPolicyNet(n_rules=len(expansion_rules))
filter_net = policies.InScopeFilterNet()

sess = tf.Session()
init = tf.global_variables_initializer()
Exemplo n.º 9
0
    def __init__(self,
                 dcompound,
                 logfile,
                 writecheck=1,
                 kSMILES="CANONICAL_SMILES",
                 kID="CMPD_CHEMBLID"):
        self.compound = dcompound
        loader = pydrug.PyDrug()

        # if SMILES, load using SMILES code
        if not kSMILES in dcompound.keys():
            try:
                smile = runExternalSoft.babelConvertSDFtoSMILE(
                    dcompound["sdf"])
                self.compound[kSMILES] = smile
            except:
                print "ERROR INPUT SDF - l33"
                self.log = "ERROR"
                try:
                    logfile.write(self.compound[kID] +
                                  "\t---\tERROR-SDF ORIGINAL INPUT\n")
                except:
                    pass
                return

        #Standardize smile code
        try:
            smilestandadized = standardize_smiles(self.compound[kSMILES])
        except:
            logfile.write(self.compound[kID] + "\t" +
                          str(self.compound[kSMILES]) + "\tERROR-SMILES INPUT"
                          "\n")
            self.log = "ERROR"
            return

        #Standardize using molvs (http://molvs.readthedocs.io/en/latest/api.html#molvs-fragment)
        s = Standardizer()
        mol = Chem.MolFromSmiles(smilestandadized)
        molstandardized = s.standardize(mol)
        smilestandadized = Chem.MolToSmiles(molstandardized)

        # remove salt
        # 1.default
        remover = SaltRemover()
        mol = Chem.MolFromSmiles(smilestandadized)
        molcleandefault = remover(mol)
        # 2. Personal remover
        homeremover = SaltRemover(defnData=LSALT)
        molclean = homeremover(molcleandefault)
        smilesclean = Chem.MolToSmiles(molclean)
        # 3. SMILES remove other manual salts + fragments -> for fragment take one if exactly same compound
        lelem = smilesclean.split(".")
        if len(lelem) > 1:
            # reduce double, case of several salts are included - 255
            lelem = list(set(lelem))
            for smilesdel in LSMILESREMOVE:
                if smilesdel in lelem:
                    lelem.remove(smilesdel)
            try:
                lelem.remove("")  # case of bad smile
            except:
                pass
            if len(lelem) == 1:
                smilesclean = str(lelem[0])
            else:
                # 4. Fragments
                #Case of fragment -> stock in log file, check after to control
                logfile.write(self.compound[kID] + "\t" +
                              str(self.compound[kSMILES]) +
                              "\tFRAGMENT IN INPUT"
                              "\n")
                print ".".join(lelem), " - FRAGMENTS - l66"
                self.log = "ERROR"
                return
        else:
            pass

        print self.compound[kSMILES], "SMILES IN - l25 liganddescriptors"
        print smilesclean, "SMILES without salt and standardized"

        # case where only salt are included
        if smilesclean == "":
            logfile.write(self.compound[kID] + "\t" +
                          str(self.compound[kSMILES]) + "\tEMPTY SMILES AFTER "
                          "STANDARDIZATION\n")
            print "EMPTY SMILES AFTER STANDARDIZATION - l84"
            self.log = "ERROR"
            return

        self.compound[kSMILES] = smilesclean
        self.log = "OK"

        if writecheck == 1:
            # SMILES code
            pfileSMILES = pathFolder.PR_COMPOUNDS + str(
                dcompound[kID]) + ".smi"
            fileSMILES = open(pfileSMILES, "w")
            fileSMILES.write(self.compound[kSMILES])
            fileSMILES.close()

            # SDF input
            if "sdf" in self.compound.keys():
                pfileSDF = pathFolder.PR_COMPOUNDS + str(
                    dcompound[kID]) + ".sdf"
                fileSDF = open(pfileSDF, "w")
                fileSDF.write(self.compound["sdf"])
                fileSDF.close()

        # read mol
        self.mol = loader.ReadMolFromSmile(self.compound[kSMILES])
Exemplo n.º 10
0
 def testStandardizeSmShort(self):
     for data in self.readPCSdata(self.dataPCS_standardize_smiles1k):
         ss = molvs.standardize_smiles(data.smiles)
         self.assertEqual(ss, data.expected)
Exemplo n.º 11
0
 def testStandardizeSmShort(self):
     for data in self.readPCSdata(self.dataPCS_standardize_smiles1k):
         ss = molvs.standardize_smiles(data.smiles)
         self.assertEqual(ss, data.expected)