Пример #1
0
    def testAppend(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            fname2 = tempfile.mktemp() + "-2.smi"

            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(one_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 batchsize=1,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:
                self.assertEqual(store.lookupName("0"), 0)

                self.assertEqual(
                    store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0])
                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))

            # now append some junk

            with open(fname2, 'w') as f:
                f.write(two_smiles)

            opts.smilesfile = fname2
            append_store.append_store(opts)
            with contextlib.closing(DescriptaStore(storefname)) as store:
                self.assertEqual(len(store), 2)
                self.assertEqual(store.lookupName("0"), 0)

                self.assertEqual(
                    store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"),
                    [0, 1])
                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.lookupName("1"), 1)

                self.assertEqual(
                    store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"),
                    [0, 1])
                self.assertEqual(store.descriptors().get(1),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(fname2):
                os.unlink(fname2)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #2
0
    def testCanonicalSmiles2(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="Canonicalize",
                                                 index_inchikey=False)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:
                counts = []
                for i in range(10):
                    r = store.descriptors().get(i)
                    counts.append(r[0])
                counts.sort()
                self.assertEqual(counts, list(range(8, 18)))

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
 def testChiralMorgans(self):
     try:
         fname = tempfile.mktemp()+".smi"
         storefname = tempfile.mktemp()+".store"
         with open(fname, 'w') as f:
             f.write(many_smiles),
             
         opts = make_store.MakeStorageOptions( storage=storefname, smilesfile=fname,
                                               hasHeader=False,
                                               smilesColumn=0, nameColumn=1,
                                               seperator=" ", descriptors="MorganChiral3Counts",
                                               index_inchikey=True )
         make_store.make_store(opts)
         generator = DescriptorGenerator.REGISTRY["MorganChiral3Counts".lower()]
         with contextlib.closing(DescriptaStore(storefname)) as store:
             for i in range(10):
                 r = store.descriptors().get(i)
                 self.assertEqual(r, expected_chiral_data[i])
                 
             
     finally:
         if os.path.exists(fname):
             os.unlink(fname)
         if os.path.exists(storefname):
             shutil.rmtree(storefname)
    def testNonesWithCalcFlags(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(one_smiles)

            opts = make_store.MakeStorageOptions(
                storage=storefname,
                smilesfile=fname,
                hasHeader=False,
                smilesColumn=0,
                nameColumn=1,
                seperator=" ",
                descriptors="NANDescriptorsWithCalcFlags",
                index_inchikey=True)
            make_store.make_store(opts)
            with contextlib.closing(DescriptaStore(storefname)) as store:
                self.assertFalse(store.descriptors().get(0)[0])

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
    def testRDKitFPBits(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write("\n".join(
                    ['{0} {1}'.format(v, k) for k, v in testSmiles.items()]))

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKitFPBits",
                                                 index_inchikey=False)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:
                for i in range(4):
                    r = store.descriptors().get(i)
                    self.assertEqual(r, expected_RDKFP_data[i])

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #6
0
    def testNormalized(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles),

            opts = make_store.MakeStorageOptions(
                storage=storefname,
                smilesfile=fname,
                hasHeader=False,
                smilesColumn=0,
                nameColumn=1,
                seperator=" ",
                descriptors="RDKit2DNormalized",
                index_inchikey=True)
            make_store.make_store(opts)
            generator = DescriptorGenerator.REGISTRY[
                "RDKit2DNormalized".lower()]
            results = []
            with contextlib.closing(DescriptaStore(storefname)) as store:
                for i in range(10):
                    r = store.descriptors().get(i)
                    compare_results(self, r, expected[i])

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #7
0
    def testManyNoInchi(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 batchsize=1,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=False)
            make_store.make_store(opts)

            origdata = many_smiles.split("\n")

            with contextlib.closing(DescriptaStore(storefname)) as store:
                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(1),
                                 (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(2),
                                 (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(3),
                                 (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(4),
                                 (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(5),
                                 (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(6),
                                 (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(7),
                                 (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(8),
                                 (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(9),
                                 (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(10),
                                 (False, 0.0, 0.0, 0.0, 0.0, 0.0))
                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    smiles, name = store.molIndex().get(i)
                    self.assertEqual(name, str(i))
                    self.assertEqual(smiles, origdata[i].split()[0])

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #8
0
    def testColCache(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:
                cols = []
                # get normal data
                for idx, _ in enumerate(store.db.colnames):
                    col = list(store.db.getColByIdx(idx))
                    cols.append(col)

                # cache the columns
                store.db.cacheColumns()
                # make sure the datafiles are written
                for idx, _ in enumerate(store.db.colnames):
                    fn = os.path.join(store.db.colCacheDir, str(idx))
                    self.assertTrue(os.path.exists(fn), fn)

                for idx, _ in enumerate(store.db.colnames):
                    col = list(store.db.getColByIdx(idx))
                    self.assertEqual(col, cols[idx])

                # swap a data file
                idx = 0, 1
                fn0 = os.path.join(store.db.colCacheDir, str(0))
                fn1 = os.path.join(store.db.colCacheDir, str(1))
                shutil.move(fn0, fn0 + ".bak")
                shutil.move(fn1, fn0)
                shutil.move(fn0 + ".bak", fn1)

                try:
                    col = list(store.db.getColByIdx(0))
                    caught = false
                except struct.error:
                    caught = True

                self.assertTrue(
                    caught, "moving cache file should have broken the cache")

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #9
0
def main():
    opts = parser.parse_args()
    store = DescriptaStore(opts.storage)

    N = len(store)
    gen = store.getDescriptorCalculator()

    randomize = True
    if opts.samples == -1:
        randomize = False
        opts.samples = len(store)

    next = .05
    for i in range(opts.samples):
        if i and float(i) / opts.samples > next:
            print("Validated %2.2f%%" % (next * 100))
            next += .05
        if randomize:
            idx = random.randint(0, N - 1)
        else:
            idx = i

        v = store.descriptors().get(idx)
        smiles = store.molIndex().getMol(idx)
        name = None
        try:
            name = store.molIndex().getName(idx)
        except:
            pass

        res = gen.process(smiles)
        if res is None:
            assert v == tuple([0] * len(v))
            continue

        data = []

        for x in gen.process(smiles):
            if math.isnan(x): data.append(None)
            else: data.append(x)
        v2 = []
        for x in v:
            if math.isnan(x): v2.append(None)
            else: v2.append(x)
        assert v2 == data, "idx:%s smiles:%s name:%s \n%r\n\t%r" % (
            idx, smiles, name, v, data)
Пример #10
0
    def testOffByOne(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(one_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 batchsize=1,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:
                self.assertEqual(store.lookupName("0"), 0)

                self.assertEqual(
                    store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0])
                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))

            try:
                store.lookupInchiKey("MY DOG HAS FLEAS")
                self.assertTrue(False)  # should not get here
            except KeyError:
                pass

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #11
0
parser.add_argument("--output-name", action="store_true", default=True,
                    help="outputs the molecule name (if available)")

# to do?
#parser.add_argument("--output-smiles", action="store_true",
#                    help="outputs the smiles column")
#parser.add_argument("--output-inchi", action="store_true",
#                    help="outputs an inchi column (if available)")

parser.add_argument("--verbose",  action="store_true",
                    help="Verbose logging")

opts = parser.parse_args()

store = DescriptaStore(opts.storage)
if opts.seperator == ",":
    writer = csv.writer(sys.stdout)
elif opts.seperator == "\t":
    writer = csv.writer(sys.stdout, dialiect=csv.excel_tab)

if opts.keep_missing_descriptors:
    opts.keep_calculatedflags = True

if opts.output_name:
    writer.writerow(['Name'] + store.getDescriptorNames(opts.keep_calculatedflags))
else:        
    writer.writerow(store.getDescriptorNames(opts.keep_calculatedflags))

if not opts.namefile:
    indices = range(len(store))
Пример #12
0
    def testAppend(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:

                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(store.lookupInchiKey(inchi), [i])
                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(1),
                                 (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(2),
                                 (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(3),
                                 (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(4),
                                 (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(5),
                                 (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(6),
                                 (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(7),
                                 (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(8),
                                 (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(9),
                                 (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(10),
                                 (False, 0.0, 0.0, 0.0, 0.0, 0.0))

            fname = tempfile.mktemp() + ".smi"
            with open(fname, 'w') as f:
                f.write(many_smiles2)

            opts.smilesfile = fname
            append_store.append_store(opts)
            with contextlib.closing(DescriptaStore(storefname)) as store:
                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    m = store.molIndex().getRDMol(i + 11)
                    self.assertTrue(m != None)
                    inchi2 = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(inchi, inchi2)
                    self.assertEqual(store.lookupInchiKey(inchi), [i, i + 11])

                for i in range(2):
                    self.assertEqual(store.descriptors().get(11 + 0),
                                     (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 1),
                                     (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 2),
                                     (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 3),
                                     (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 4),
                                     (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 5),
                                     (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 6),
                                     (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 7),
                                     (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 8),
                                     (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 9),
                                     (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                    self.assertEqual(store.descriptors().get(11 + 10),
                                     (False, 0.0, 0.0, 0.0, 0.0, 0.0))

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
Пример #13
0
    def testMany(self):
        try:
            fname = tempfile.mktemp() + ".smi"
            storefname = tempfile.mktemp() + ".store"
            with open(fname, 'w') as f:
                f.write(many_smiles)

            opts = make_store.MakeStorageOptions(storage=storefname,
                                                 smilesfile=fname,
                                                 hasHeader=False,
                                                 smilesColumn=0,
                                                 nameColumn=1,
                                                 seperator=" ",
                                                 descriptors="RDKit2DSubset",
                                                 index_inchikey=True)
            make_store.make_store(opts)

            with contextlib.closing(DescriptaStore(storefname)) as store:

                for i in range(10):
                    self.assertEqual(store.lookupName(str(i)), i)

                self.assertEqual(store.descriptors().get(0),
                                 (True, 78.046950192, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(1),
                                 (True, 92.062600256, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(2),
                                 (True, 106.07825032, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(3),
                                 (True, 120.093900384, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(4),
                                 (True, 134.109550448, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(5),
                                 (True, 148.125200512, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(6),
                                 (True, 162.140850576, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(7),
                                 (True, 176.15650064, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(8),
                                 (True, 190.172150704, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(9),
                                 (True, 204.187800768, 0.0, 1.0, 0.0, 1.0))
                self.assertEqual(store.descriptors().get(10),
                                 (False, 0.0, 0.0, 0.0, 0.0, 0.0))

                self.assertEqual(
                    store.descriptors().getDict(7),
                    toDict((True, 176.15650064, 0.0, 1.0, 0.0, 1.0)))

                calc = store.getDescriptorCalculator()

                for i in range(10):
                    m = store.molIndex().getRDMol(i)
                    sm = AllChem.MolToSmiles(m)
                    inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m))
                    self.assertEqual(store.lookupInchiKey(inchi), [i])
                    v = store.descriptors().get(i)
                    sv = tuple(calc.process(sm))
                    self.assertEqual(v, sv)

        finally:
            if os.path.exists(fname):
                os.unlink(fname)
            if os.path.exists(storefname):
                shutil.rmtree(storefname)
from descriptastorus import DescriptaStore
import argparse, os, logging, shutil, time, random

import sys
from rdkit import rdBase
rdBase.DisableLog("rdApp.*")


parser = argparse.ArgumentParser()
parser.add_argument("storage",
                    help="directory in which to store the descriptors")
parser.add_argument("storage2",
                    help="directory in which to store the descriptors")

opts = parser.parse_args()
store1 = DescriptaStore(opts.storage)
store2 = DescriptaStore(opts.storage2)

if store1.getDescriptorNames() != store2.getDescriptorNames():
    logging.warning("Storages are not compatible, columns are different")
    s1 = set(store1.getDescriptorNames())
    s2 = set(store2.getDescriptorNames())
    
    if s1 == s2:
        logging.warning("Columns are the same but reordered")
    else:
        logging.warning("Extra columns in store1:\n\t%s", "\n\t".join(s1-s2))
        logging.warning("Extra columns in store2:\n\t%s", "\n\t".join(s2-s1))
        
    for i,(a,b) in enumerate(zip(store1.getDescriptorNames(),
                                 store2.getDescriptorNames())):