def DBToData(dbName,tableName,user='******',password='******',dupCol=-1, what='*',where='',join='',pickleCol=-1,pickleClass=None, ensembleIds=None): """ constructs an _MLData.MLDataSet_ from a database **Arguments** - dbName: the name of the database to be opened - tableName: the table name containing the data in the database - user: the user name to be used to connect to the database - password: the password to be used to connect to the database - dupCol: if nonzero specifies which column should be used to recognize duplicates. **Returns** an _MLData.MLDataSet_ **Notes** - this uses Dbase.DataUtils functionality """ conn = DbConnect(dbName,tableName,user,password) res = conn.GetData(fields=what,where=where,join=join,removeDups=dupCol, forceList=1) nPts = len(res) vals = [None]*nPts ptNames = [None]*nPts classWorks=True for i in range(nPts): tmp = list(res[i]) ptNames[i] = tmp.pop(0) if pickleCol>=0: if not pickleClass or not classWorks: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) else: try: tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) except Exception: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) classWorks=False if ensembleIds: tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol],ensembleIds) else: if ensembleIds: tmp = TakeEnsemble(tmp,ensembleIds,isDataVect=True) vals[i] = tmp varNames = conn.GetColumnNames(join=join,what=what) data = MLData.MLDataSet(vals,varNames=varNames,ptNames=ptNames) return data
def testPointPickles(self): pt = geom.Point3D(2.0,-3.0,1.0) pt2 = cPickle.loads(cPickle.dumps(pt)) self.assertTrue(feq(pt.x,pt2.x,1e-6)) self.assertTrue(feq(pt.y,pt2.y,1e-6)) self.assertTrue(feq(pt.z,pt2.z,1e-6)) pt = geom.Point2D(2.0,-4.0) pt2 = cPickle.loads(cPickle.dumps(pt)) self.assertTrue(feq(pt.x,pt2.x,1e-6)) self.assertTrue(feq(pt.y,pt2.y,1e-6))
def testPointPickles(self): pt = geom.Point3D(2.0, -3.0, 1.0) pt2 = cPickle.loads(cPickle.dumps(pt)) self.assertTrue(feq(pt.x, pt2.x, 1e-6)) self.assertTrue(feq(pt.y, pt2.y, 1e-6)) self.assertTrue(feq(pt.z, pt2.z, 1e-6)) pt = geom.Point2D(2.0, -4.0) pt2 = cPickle.loads(cPickle.dumps(pt)) self.assertTrue(feq(pt.x, pt2.x, 1e-6)) self.assertTrue(feq(pt.y, pt2.y, 1e-6))
def testPkl2(self): """ further pickle tests """ smis = self.bigSmiList for smi in smis: m = Chem.MolFromSmiles(smi) newM1 = cPickle.loads(cPickle.dumps(m)) newM2 = cPickle.loads(cPickle.dumps(newM1)) oldSmi = Chem.MolToSmiles(newM1) newSmi = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms() == m.GetNumAtoms(), "num atoms comparison failed" assert newM2.GetNumAtoms() == m.GetNumAtoms(), "num atoms comparison failed" assert oldSmi == newSmi, "string compare failed: %s != %s" % (oldSmi, newSmi)
def testPkl2(self): """ further pickle tests """ smis = self.bigSmiList for smi in smis: m = Chem.MolFromSmiles(smi) newM1 = cPickle.loads(cPickle.dumps(m)) newM2 = cPickle.loads(cPickle.dumps(newM1)) oldSmi = Chem.MolToSmiles(newM1) newSmi = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed' assert newM2.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed' assert oldSmi==newSmi,'string compare failed: %s != %s'%(oldSmi,newSmi)
def DBToData(dbName, tableName, user='******', password='******', dupCol=-1, what='*', where='', join='', pickleCol=-1, pickleClass=None, ensembleIds=None): """ constructs an _MLData.MLDataSet_ from a database **Arguments** - dbName: the name of the database to be opened - tableName: the table name containing the data in the database - user: the user name to be used to connect to the database - password: the password to be used to connect to the database - dupCol: if nonzero specifies which column should be used to recognize duplicates. **Returns** an _MLData.MLDataSet_ **Notes** - this uses Dbase.DataUtils functionality """ conn = DbConnect(dbName, tableName, user, password) res = conn.GetData(fields=what, where=where, join=join, removeDups=dupCol, forceList=1) nPts = len(res) vals = [None] * nPts ptNames = [None] * nPts classWorks = True for i in range(nPts): tmp = list(res[i]) ptNames[i] = tmp.pop(0) if pickleCol >= 0: if not pickleClass or not classWorks: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) else: try: tmp[pickleCol] = pickleClass(str(tmp[pickleCol])) except Exception: tmp[pickleCol] = cPickle.loads(str(tmp[pickleCol])) classWorks = False if ensembleIds: tmp[pickleCol] = BitUtils.ConstructEnsembleBV(tmp[pickleCol], ensembleIds) else: if ensembleIds: tmp = TakeEnsemble(tmp, ensembleIds, isDataVect=True) vals[i] = tmp varNames = conn.GetColumnNames(join=join, what=what) data = MLData.MLDataSet(vals, varNames=varNames, ptNames=ptNames) return data
def test2CatStringPickle(self): self._fillCat(self.smiList2) # test non-binary pickle: cat2 = cPickle.loads(cPickle.dumps(self.fragCat)) assert cat2.GetNumEntries() == 21 assert cat2.GetFPLength() == 21 self._testBits(cat2) # test binary pickle: cat2 = cPickle.loads(cPickle.dumps(self.fragCat, 1)) assert cat2.GetNumEntries() == 21 assert cat2.GetFPLength() == 21 self._testBits(cat2)
def test2CatStringPickle(self): self._fillCat(self.smiList2) # test non-binary pickle: cat2 = cPickle.loads(cPickle.dumps(self.fragCat)) assert cat2.GetNumEntries()==21 assert cat2.GetFPLength()==21 self._testBits(cat2) # test binary pickle: cat2 = cPickle.loads(cPickle.dumps(self.fragCat,1)) assert cat2.GetNumEntries()==21 assert cat2.GetFPLength()==21 self._testBits(cat2)
def test4Serialize(self): with open(self.smiName, 'r') as smiF: smiLines = smiF.readlines() fparams = FragmentCatalog.FragCatParams(1, 6, self.fName) fcat = FragmentCatalog.FragCatalog(fparams) fgen = FragmentCatalog.FragCatGenerator() suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0) smiles = [] for mol in suppl: nent = fgen.AddFragsFromMol(mol, fcat) smiles.append(Chem.MolToSmiles(mol)) self.assertEqual(fcat.GetNumEntries(), 21) self.assertEqual(fcat.GetFPLength(), 21) pkl = cPickle.dumps(fcat) fcat2 = cPickle.loads(pkl) self.assertEqual(fcat2.GetNumEntries(), 21) self.assertEqual(fcat2.GetFPLength(), 21) fpgen = FragmentCatalog.FragFPGenerator() for i in range(len(smiles)): smi = smiles[i] mol = Chem.MolFromSmiles(smi) fp1 = fpgen.GetFPForMol(mol, fcat) fp2 = fpgen.GetFPForMol(mol, fcat2) self.assertEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits()) obl1 = fp1.GetOnBits() obl2 = fp2.GetOnBits() self.assertEqual(tuple(obl1), tuple(obl2))
def testPkl1(self): " testing single molecule pickle " m = Chem.MolFromSmiles('CCOC') outS = Chem.MolToSmiles(m) m2 = cPickle.loads(cPickle.dumps(m)) outS2 = Chem.MolToSmiles(m2) assert outS==outS2,"bad pickle: %s != %s"%(outS,outS2)
def next(self): curs = self.cursor if not curs or \ curs.closed or \ curs.conn is None or \ curs.res is None or \ (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): raise StopIteration if not self._first: res = curs.conn.conn.query('fetch 1 from "%s"' % self.cursor.name) if res.ntuples == 0: raise StopIteration else: if res.nfields < 2: raise ValueError('bad result: %s' % str(res)) t = [res.getvalue(0, x) for x in range(res.nfields)] val = t[self._pickleCol] else: t = curs.fetchone() val = str(t[self._pickleCol]) self._first = 0 if self._depickle: if not self._klass: fp = cPickle.loads(val) else: fp = self._klass(val) fields = list(t) del fields[self._pickleCol] fp._fieldsFromDb = fields else: fp = list(t) return fp
def testPkl1(self): " testing single molecule pickle " m = Chem.MolFromSmiles("CCOC") outS = Chem.MolToSmiles(m) m2 = cPickle.loads(cPickle.dumps(m)) outS2 = Chem.MolToSmiles(m2) assert outS == outS2, "bad pickle: %s != %s" % (outS, outS2)
def next(self): curs = self.cursor if not curs or \ curs.closed or \ curs.conn is None or \ curs.res is None or \ (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): raise StopIteration if not self._first: res = curs.conn.conn.query('fetch 1 from "%s"'%self.cursor.name) if res.ntuples == 0: raise StopIteration else: if res.nfields < 2: raise ValueError,'bad result: %s'%str(res) t = [res.getvalue(0,x) for x in range(res.nfields)] val = t[self._pickleCol] else: t = curs.fetchone() val = str(t[self._pickleCol]) self._first = 0 if self._depickle: if not self._klass: fp = cPickle.loads(val) else: fp = self._klass(val) fields = list(t) del fields[self._pickleCol] fp._fieldsFromDb = fields else: fp = list(t) return fp
def __getitem__(self,idx): if self.res is None: self.cursor.execute(self.cmd) self._first = self.cursor.fetchone() self._validate() self.res = self.cursor.conn.conn.query('fetch all from "%s"'%self.cursor.name) self.rowCount = self.res.ntuples+1 self.idx=0 if self.res.nfields < 2: raise ValueError,'bad query result'%str(res) if idx < 0: idx = self.rowCount+idx if idx<0 or (idx >= 0 and idx >= self.rowCount): raise IndexError if idx==0: val = str(self._first[self._pickleCol]) t = list(self._first) else: val = self.res.getvalue(self.idx-1,self._pickleCol) t = [self.res.getvalue(self.idx-1,x) for x in range(self.res.nfields)] if self._depickle: try: fp = cPickle.loads(val) except: import logging del t[self._pickleCol] logging.exception('Depickling failure in row: %s'%str(t)) raise del t[self._pickleCol] fp._fieldsFromDb = t else: fp = t return fp
def test3Pickle2(self): """ """ l = 1 << 21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError, lambda: v1[l + 1]) v1[0] = 1 v1[2] = 2 v1[1 << 12] = 3 self.assertTrue(v1 == v1) v2 = cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2 == v1) v3 = ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2 == v3) self.assertTrue(v1 == v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'rb') as f: v3 = cPickle.load(f) self.assertTrue(v3 == v1)
def test3Pickle2(self): """ """ l=1<<21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError,lambda:v1[l+1]) v1[0]=1 v1[2]=2 v1[1<<12]=3 self.assertTrue(v1==v1) v2= cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2==v1) v3= ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2==v3) self.assertTrue(v1==v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'r' ) as tf: buf = tf.read().replace('\r\n', '\n').encode('utf-8') tf.close() with io.BytesIO(buf) as f: v3 = cPickle.load(f) self.assertTrue(v3==v1)
def test3Pickle2(self): """ """ l = 1 << 21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError, lambda: v1[l + 1]) v1[0] = 1 v1[2] = 2 v1[1 << 12] = 3 self.assertTrue(v1 == v1) v2 = cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2 == v1) v3 = ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2 == v3) self.assertTrue(v1 == v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'r') as tf: buf = tf.read().replace('\r\n', '\n').encode('utf-8') tf.close() with io.BytesIO(buf) as f: v3 = cPickle.load(f) self.assertTrue(v3 == v1)
def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0),123) pkl = cPickle.dumps(ffeat) ffeat2 = cPickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId()==ffeat.GetId()); self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos())) # Check that the old pickled versions have not been broken inF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'),'rb') ffeat2=cPickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos())) # Test the new version also has the id and works as expected # uncomment the following to generate (overrwrite) new version of pickled # data file #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'rb') ffeat2=cPickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId()==ffeat.GetId()); self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature( "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123) pkl = cPickle.dumps(ffeat) ffeat2 = cPickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Check that the old pickled versions have not been broken inF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'rb') ffeat2 = cPickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Test the new version also has the id and works as expected # uncomment the following to generate (overrwrite) new version of pickled # data file #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), 'rb') ffeat2 = cPickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
def test3Pickle2(self): """ """ l=1<<21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError,lambda:v1[l+1]) v1[0]=1 v1[2]=2 v1[1<<12]=3 self.assertTrue(v1==v1) v2= cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2==v1) v3= ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2==v3) self.assertTrue(v1==v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'rb' ) as f: v3 = cPickle.load(f) self.assertTrue(v3==v1)
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_regress.pkl') refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile, 'r') as intf: buf = intf.read().replace('\r\n', '\n').encode('utf-8') intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() refData = cPickle.loads(pkl, encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] refData2 = [] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) refData2.append((m, mqns)) if mqns != refData[i][1]: indices = [ (j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y ] print(i, Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def _testPkl10(self): " testing 5k molecule pickles " inLines = open('%s/NCI/first_5K.smi'%(RDConfig.RDDataDir),'r').readlines() smis = [] for line in inLines: smis.append(line.split('\t')[0]) for smi in smis: m = Chem.MolFromSmiles(smi) newM1 = cPickle.loads(cPickle.dumps(m)) newSmi1 = Chem.MolToSmiles(newM1) newM2 = cPickle.loads(cPickle.dumps(newM1)) newSmi2 = Chem.MolToSmiles(newM2) assert newM1.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed' assert newM2.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed' assert len(newSmi1)>0,'empty smi1' assert len(newSmi2)>0,'empty smi2' assert newSmi1==newSmi2,'string compare failed:\n%s\n\t!=\n%s\norig smiles:\n%s'%(newSmi1,newSmi2,smi)
def getFingerprints(conn) : data = conn.GetData(table='signatures', fields='mol_name,fingerprint') fpMap = {} for dat in data : pkl = str(dat[1]) sbv = pickle.loads(pkl) fpMap[dat[0]] = sbv return fpMap
def getFingerprints(conn): data = conn.GetData(table='signatures', fields='mol_name,fingerprint') fpMap = {} for dat in data: pkl = str(dat[1]) sbv = pickle.loads(pkl) fpMap[dat[0]] = sbv return fpMap
def test6PickleEquals(self): " testing pickled tree equals " self._readyTree() pkl = cPickle.dumps(self.baseTree) oTree = cPickle.loads(pkl) assert oTree == self.baseTree, 'Pickle inequality test failed' self.baseTree.PruneChild(self.baseTree.GetChildren()[0]) assert oTree != self.baseTree, 'Pickle inequality test failed (bad Tree.__cmp__)'
def test6PickleEquals(self): " testing pickled tree equals " self._readyTree() pkl = cPickle.dumps(self.baseTree) oTree = cPickle.loads(pkl) assert oTree == self.baseTree,'Pickle inequality test failed' self.baseTree.PruneChild(self.baseTree.GetChildren()[0]) assert oTree != self.baseTree,'Pickle inequality test failed (bad Tree.__cmp__)'
def DepickleFP(pkl,similarityMethod): if not isinstance(pkl,(bytes,str)): pkl = str(pkl) try: klass=similarityMethods[similarityMethod] fp = klass(pkl) except Exception: import traceback traceback.print_exc() fp = cPickle.loads(pkl) return fp
def test2ExplicitPickle(self): nbits = 10000 bv1 = DataStructs.ExplicitBitVect(nbits) for i in range(1000): x = random.randrange(0, nbits) bv1.SetBit(x) pkl = pickle.dumps(bv1, 1) bv2 = pickle.loads(pkl) for i in range(nbits): assert bv1[i] == bv2[i]
def DepickleFP(pkl, similarityMethod): if not isinstance(pkl, (bytes, str)): pkl = str(pkl) try: klass = similarityMethods[similarityMethod] fp = klass(pkl) except: import traceback traceback.print_exc() fp = cPickle.loads(pkl) return fp
def GetComposites(details): res = [] if details.persistTblName and details.inNote: conn = DbConnect(details.dbName,details.persistTblName) mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote)) for row in mdls: rawD = row[0] res.append(cPickle.loads(str(rawD))) elif details.composFileName: res.append(cPickle.load(open(details.composFileName,'rb'))) return res
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl') with open(refFile,'rb') as inf: pkl = inf.read() refData = cPickle.loads(pkl,encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')] for i,m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns!=refData[i][1]: indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y] print(Chem.MolToSmiles(m),indices) self.assertEqual(mqns,refData[i][1])
def setUp(self): self.dataset = dict() self.dataset_inchi = dict() inf = gzip.open( os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'), 'r') self.dataset['problematic'] = ForwardSDMolSupplier(inf, sanitize=False, removeHs=False) with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'), 'r') as intF: buf = intF.read().replace('\r\n', '\n').encode('latin1') intF.close() with io.BytesIO(buf) as inF: pkl = inF.read() self.dataset_inchi['problematic'] = loads(pkl, encoding='latin1') # disable logging DisableLog('rdApp.warning')
def test3Pickles(self): #outF = file('../testData/rvvs.pkl','wb+') with open(os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/rvvs.pkl'), 'rb') as inF: v1 = ds.RealValueVect(30) for i in range(15): v1[2*i] = 1.3 v2 = cPickle.loads(cPickle.dumps(v1)) self.assertAlmostEqual(ds.ComputeL1Norm(v1, v2), 0) #cPickle.dump(v1,outF) v2=cPickle.load(inF, encoding='bytes') self.assertAlmostEqual(ds.ComputeL1Norm(v1, v2), 0) self.assertAlmostEqual(v1.GetTotalVal(), v2.GetTotalVal()) self.failUnless(v2.GetTotalVal()!=0)
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir, "Chem", "test_data", "MQNs_regress.pkl") with open(refFile, "r") as intf: buf = intf.read().replace("\r\n", "\n").encode("utf-8") intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() refData = cPickle.loads(pkl, encoding="bytes") fn = os.path.join(RDConfig.RDCodeDir, "Chem", "test_data", "aromat_regress.txt") ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter="\t")] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns != refData[i][1]: indices = [(j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y] print(Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def test4GridPickles(self): grd = geom.UniformGrid3D(10.0, 9.0, 8.0, 0.5) self.assertTrue(grd.GetNumX() == 20) self.assertTrue(grd.GetNumY() == 18) self.assertTrue(grd.GetNumZ() == 16) grd.SetSphereOccupancy(geom.Point3D(-2.0, -2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(-2.0, 2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(2.0, -2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(2.0, 2.0, 0.0), 1.5, 0.25) self.assertTrue(geom.TanimotoDistance(grd, grd) == 0.0) grd2 = cPickle.loads(cPickle.dumps(grd)) self.assertTrue(grd2.GetNumX() == 20) self.assertTrue(grd2.GetNumY() == 18) self.assertTrue(grd2.GetNumZ() == 16) self.assertTrue(geom.TanimotoDistance(grd, grd2) == 0.0)
def test4GridPickles(self): grd = geom.UniformGrid3D(10.0, 9.0, 8.0, 0.5) self.assertTrue(grd.GetNumX() == 20) self.assertTrue(grd.GetNumY() == 18) self.assertTrue(grd.GetNumZ() == 16) grd.SetSphereOccupancy(geom.Point3D(-2.0, -2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(-2.0, 2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(2.0, -2.0, 0.0), 1.5, 0.25) grd.SetSphereOccupancy(geom.Point3D(2.0, 2.0, 0.0), 1.5, 0.25) self.assertTrue(geom.TanimotoDistance(grd,grd)==0.0) grd2 = cPickle.loads(cPickle.dumps(grd)) self.assertTrue(grd2.GetNumX() == 20) self.assertTrue(grd2.GetNumY() == 18) self.assertTrue(grd2.GetNumZ() == 16) self.assertTrue(geom.TanimotoDistance(grd,grd2)==0.0)
def test12Pickles(self): rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1') pkl = cPickle.dumps(rxn) rxn = cPickle.loads(pkl) mol = Chem.MolFromSmiles('C1ON1') products = rxn.RunReactants([mol]) self.assertEqual(len(products),1) for p in products: self.assertEqual(len(p),1) self.assertEqual(p[0].GetNumAtoms(),3) self.assertEqual(p[0].GetNumBonds(),2) rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) products = rxn.RunReactants([mol]) self.assertEqual(len(products),1) for p in products: self.assertEqual(len(p),1) self.assertEqual(p[0].GetNumAtoms(),3) self.assertEqual(p[0].GetNumBonds(),2)
def setUp(self): self.dataset = dict() self.dataset_inchi = dict() inf = gzip.open( os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'), 'r') self.dataset['problematic'] = ForwardSDMolSupplier(inf, sanitize=False, removeHs=False) with open( os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'), 'r') as intF: buf = intF.read().replace('\r\n', '\n').encode('latin1') intF.close() with io.BytesIO(buf) as inF: pkl = inF.read() self.dataset_inchi['problematic'] = loads(pkl, encoding='latin1') # disable logging DisableLog('rdApp.warning')
def _BuildFp(self, data): data = list(data) if six.PY3: pkl = bytes(data[self.fpCol], encoding='Latin1') else: pkl = str(data[self.fpCol]) del data[self.fpCol] self._numProcessed += 1 try: if self._usePickles: newFp = cPickle.loads(pkl, encoding='bytes') else: newFp = DataStructs.ExplicitBitVect(pkl) except Exception: import traceback traceback.print_exc() newFp = None if newFp: newFp._fieldsFromDb = data return newFp
def _BuildFp(self, data): data = list(data) if six.PY3: pkl = bytes(data[self.fpCol], encoding='Latin1') else: pkl = str(data[self.fpCol]) del data[self.fpCol] self._numProcessed += 1 try: if self._usePickles: newFp = cPickle.loads(pkl, encoding='bytes') else: newFp = DataStructs.ExplicitBitVect(pkl) except: import traceback traceback.print_exc() newFp = None if newFp: newFp._fieldsFromDb = data return newFp
def test1(self): cat = MolCatalog.CreateMolCatalog() es = [] for smi in ('C1CCC1OC','C1CCC1','C'): m = Chem.MolFromSmiles(smi) entry = MolCatalog.MolCatalogEntry() entry.SetMol(m) self.assertTrue(entry.GetMol()) eSmi = Chem.MolToSmiles(entry.GetMol()) self.assertTrue(eSmi==Chem.MolToSmiles(m)) entry.SetDescription(smi) self.assertTrue(entry.GetDescription()==smi) es.append(entry) v=cat.AddEntry(es[0]) self.assertTrue(v==0) self.assertTrue(cat.GetNumEntries()==1) v=cat.AddEntry(es[1]) self.assertTrue(v==1) self.assertTrue(cat.GetNumEntries()==2) v=cat.AddEntry(es[2]) self.assertTrue(v==2) self.assertTrue(cat.GetNumEntries()==3) cat.AddEdge(0,1) cat.AddEdge(0,2) cat.AddEdge(1,2) d = cPickle.dumps(cat) es = None entry = None cat=None cat = cPickle.loads(d) self.assertTrue(cat.GetNumEntries()==3) cat=None
def test1(self): cat = MolCatalog.CreateMolCatalog() es = [] for smi in ('C1CCC1OC', 'C1CCC1', 'C'): m = Chem.MolFromSmiles(smi) entry = MolCatalog.MolCatalogEntry() entry.SetMol(m) self.assertTrue(entry.GetMol()) eSmi = Chem.MolToSmiles(entry.GetMol()) self.assertTrue(eSmi == Chem.MolToSmiles(m)) entry.SetDescription(smi) self.assertTrue(entry.GetDescription() == smi) es.append(entry) v = cat.AddEntry(es[0]) self.assertTrue(v == 0) self.assertTrue(cat.GetNumEntries() == 1) v = cat.AddEntry(es[1]) self.assertTrue(v == 1) self.assertTrue(cat.GetNumEntries() == 2) v = cat.AddEntry(es[2]) self.assertTrue(v == 2) self.assertTrue(cat.GetNumEntries() == 3) cat.AddEdge(0, 1) cat.AddEdge(0, 2) cat.AddEdge(1, 2) d = cPickle.dumps(cat) es = None entry = None cat = None cat = cPickle.loads(d) self.assertTrue(cat.GetNumEntries() == 3) cat = None
def __getitem__(self, idx): if self.res is None: self.cursor.execute(self.cmd) self._first = self.cursor.fetchone() self._validate() self.res = self.cursor.conn.conn.query('fetch all from "%s"' % self.cursor.name) self.rowCount = self.res.ntuples + 1 self.idx = 0 if self.res.nfields < 2: raise ValueError('bad query result' % str(res)) if idx < 0: idx = self.rowCount + idx if idx < 0 or (idx >= 0 and idx >= self.rowCount): raise IndexError if idx == 0: val = str(self._first[self._pickleCol]) t = list(self._first) else: val = self.res.getvalue(self.idx - 1, self._pickleCol) t = [ self.res.getvalue(self.idx - 1, x) for x in range(self.res.nfields) ] if self._depickle: try: fp = cPickle.loads(val) except Exception: import logging del t[self._pickleCol] logging.exception('Depickling failure in row: %s' % str(t)) raise del t[self._pickleCol] fp._fieldsFromDb = t else: fp = t return fp
def test12Pickles(self): # 08/05/14 # This test is changed due to a new behavior of the smarts # reaction parser which now allows using parenthesis in products # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1' rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)') pkl = cPickle.dumps(rxn) rxn = cPickle.loads(pkl) mol = Chem.MolFromSmiles('C1ON1') products = rxn.RunReactants([mol]) self.assertEqual(len(products),1) for p in products: self.assertEqual(len(p),1) self.assertEqual(p[0].GetNumAtoms(),3) self.assertEqual(p[0].GetNumBonds(),2) rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary()) products = rxn.RunReactants([mol]) self.assertEqual(len(products),1) for p in products: self.assertEqual(len(p),1) self.assertEqual(p[0].GetNumAtoms(),3) self.assertEqual(p[0].GetNumBonds(),2)
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl') refFile2 = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile,'r') as intf: buf = intf.read().replace('\r\n', '\n').encode('utf-8') intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() refData = cPickle.loads(pkl,encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')] refData2 = [] for i,m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) refData2.append((m, mqns)) if mqns!=refData[i][1]: indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y] print(i, Chem.MolToSmiles(m),indices) self.assertEqual(mqns,refData[i][1])
if details.persistTblName: conn = DbConnect(details.dbName, details.persistTblName) message('-> Retrieving models from database') curs = conn.GetCursor() curs.execute("select model from %s where note='%s'" % (details.persistTblName, details.note)) message('-> Reconstructing models') try: blob = curs.fetchone() except Exception: blob = None while blob: message(' Building model %d' % len(models)) blob = blob[0] try: models.append(cPickle.loads(str(blob))) except Exception: import traceback traceback.print_exc() print('Model failed') else: message(' <-Done') try: blob = curs.fetchone() except Exception: blob = None curs = None else: for modelName in extras: try: model = cPickle.load(open(modelName, 'rb'))
composites = [] if db is None: for arg in extras: composite = cPickle.load(open(arg, 'rb')) composites.append(composite) else: tbl = extras[0] conn = DbConnect(db, tbl) if note: where = "where note='%s'" % (note) else: where = '' if not skip: pkls = conn.GetData(fields='model', where=where) composites = [] for pkl in pkls: pkl = str(pkl[0]) comp = cPickle.loads(pkl) composites.append(comp) if len(composites): ProcessIt(composites, count, verbose=verbose) elif not skip: print('ERROR: no composite models found') sys.exit(-1) if db: res = ErrorStats(conn, where, enrich=enrich) if res: ShowStats(res)
def test1cPointND(self): dim = 4 pt = geom.PointND(4) for i in range(dim): self.assertTrue(feq(pt[i], 0.0)) pt[0] = 3 pt[3] = 4 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[3], 4.0)) self.assertTrue(feq(pt[-4], 3.0)) self.assertTrue(feq(pt[-1], 4.0)) lst = list(pt) self.assertTrue(feq(lst[0], 3.0)) self.assertTrue(feq(lst[3], 4.0)) pt2 = geom.PointND(4) pt2[0] = 1. pt2[2] = 1. pt3 = pt + pt2 self.assertTrue(feq(pt3[0], 4.0)) self.assertTrue(feq(pt3[2], 1.0)) self.assertTrue(feq(pt3[3], 4.0)) pt += pt2 self.assertTrue(feq(pt[0], 4.0)) self.assertTrue(feq(pt[2], 1.0)) self.assertTrue(feq(pt[3], 4.0)) pt3 = pt - pt2 self.assertTrue(feq(pt3[0], 3.0)) self.assertTrue(feq(pt3[2], 0.0)) self.assertTrue(feq(pt3[3], 4.0)) pt -= pt2 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 4.0)) pt *= 2.0 self.assertTrue(feq(pt[0], 6.0)) self.assertTrue(feq(pt[1], 0.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 8.0)) pt /= 2 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[1], 0.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 4.0)) self.assertTrue(feq(pt.Length(), 5.0)) self.assertTrue(feq(pt.LengthSq(), 25.0)) pt.Normalize() self.assertTrue(feq(pt.Length(), 1.0)) pkl = cPickle.dumps(pt) pt2 = cPickle.loads(pkl) self.assertTrue(len(pt) == len(pt2)) for i in range(len(pt)): self.assertTrue(feq(pt2[i], pt[i]))
def test3Pickles(self): #outF = file('dvvs.pkl','wb+') with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/dvvs.pkl'), 'r' ) as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30) for i in range(15): v1[2*i] = 1 v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) #cPickle.dump(v1,outF) v2=pickle.load(inF, encoding='bytes') self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal()) self.assertTrue(v2.GetTotalVal()!=0) v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30) for i in range(30): v1[i] = i%4 v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) #pickle.dump(v1,outF) v2=pickle.load(inF, encoding='bytes') self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal()) self.assertTrue(v2.GetTotalVal()!=0) v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16) for i in range(16): v1[i] = i%16 v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) #pickle.dump(v1,outF) v2=pickle.load(inF, encoding='bytes') self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal()) self.assertTrue(v2.GetTotalVal()!=0) v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5) v1[0] = 34 v1[1] = 167 v1[2] = 3 v1[3] = 56 v1[4] = 128 v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) #pickle.dump(v1,outF) v2=pickle.load(inF, encoding='bytes') self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal()) self.assertTrue(v2.GetTotalVal()!=0) v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3) v1[0] = 2345 v1[1] = 64578 v1[2] = 34 v2 = pickle.loads(pickle.dumps(v1)) self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) #pickle.dump(v1,outF) v2=pickle.load(inF, encoding='bytes') self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0) self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal()) self.assertTrue(v2.GetTotalVal()!=0)
def test1cPointND(self): dim=4 pt = geom.PointND(4); for i in range(dim): self.assertTrue(feq(pt[i], 0.0)) pt[0]=3 pt[3]=4 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[3], 4.0)) self.assertTrue(feq(pt[-4], 3.0)) self.assertTrue(feq(pt[-1], 4.0)) lst = list(pt) self.assertTrue(feq(lst[0], 3.0)) self.assertTrue(feq(lst[3], 4.0)) pt2 = geom.PointND(4) pt2[0]=1. pt2[2]=1. pt3 = pt+pt2 self.assertTrue(feq(pt3[0], 4.0)) self.assertTrue(feq(pt3[2], 1.0)) self.assertTrue(feq(pt3[3], 4.0)) pt += pt2 self.assertTrue(feq(pt[0], 4.0)) self.assertTrue(feq(pt[2], 1.0)) self.assertTrue(feq(pt[3], 4.0)) pt3 = pt-pt2 self.assertTrue(feq(pt3[0], 3.0)) self.assertTrue(feq(pt3[2], 0.0)) self.assertTrue(feq(pt3[3], 4.0)) pt -= pt2 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 4.0)) pt *= 2.0 self.assertTrue(feq(pt[0], 6.0)) self.assertTrue(feq(pt[1], 0.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 8.0)) pt /= 2 self.assertTrue(feq(pt[0], 3.0)) self.assertTrue(feq(pt[1], 0.0)) self.assertTrue(feq(pt[2], 0.0)) self.assertTrue(feq(pt[3], 4.0)) self.assertTrue(feq(pt.Length(), 5.0)) self.assertTrue(feq(pt.LengthSq(), 25.0)) pt.Normalize() self.assertTrue(feq(pt.Length(), 1.0)) pkl = cPickle.dumps(pt) pt2 = cPickle.loads(pkl) self.assertTrue(len(pt)==len(pt2)) for i in range(len(pt)): self.assertTrue(feq(pt2[i],pt[i]))
qs = [] smas = [] for line in file(RDConfig.RDDataDir + '/SmartsLib/RLewis_smarts.txt', 'r').readlines(): if line[0] == '#': continue line = line.split(' ') p = Chem.MolFromSmarts(line[0]) if not p: print(line[0], file=sys.stderr) continue smas.append(line[0]) qs.append(p) logger.info('reading target counts') refFps = cPickle.loads(gzip.open('fps.1000.counts.pkl.gz', 'rb').read()) fps = [] logger.info('reading mols:') ms = cPickle.loads(gzip.open('mols.1000.pkl.gz', 'rb').read()) t1 = time.time() nFail = 0 for i, m in enumerate(ms): fp = [0] * len(qs) for j, q in enumerate(qs): o = m.GetSubstructMatches(q) if len(o) != refFps[i][j]: print(' >', i, j, o, refFps[i][j], Chem.MolToSmiles(m), smas[j]) nFail += 1 if nFail == 10: raise ValueError