Пример #1
0
    def testPointPickles(self):
        pt = geom.Point3D(2.0,-3.0,1.0)
        pt2 = cPickle.loads(cPickle.dumps(pt))
        self.assertTrue(feq(pt.x,pt2.x,1e-6))
        self.assertTrue(feq(pt.y,pt2.y,1e-6))
        self.assertTrue(feq(pt.z,pt2.z,1e-6))

        pt = geom.Point2D(2.0,-4.0)
        pt2 = cPickle.loads(cPickle.dumps(pt))
        self.assertTrue(feq(pt.x,pt2.x,1e-6))
        self.assertTrue(feq(pt.y,pt2.y,1e-6))
Пример #2
0
 def testPkl2(self):
     """ further pickle tests """
     smis = self.bigSmiList
     for smi in smis:
         m = Chem.MolFromSmiles(smi)
         newM1 = cPickle.loads(cPickle.dumps(m))
         newM2 = cPickle.loads(cPickle.dumps(newM1))
         oldSmi = Chem.MolToSmiles(newM1)
         newSmi = Chem.MolToSmiles(newM2)
         assert newM1.GetNumAtoms() == m.GetNumAtoms(), "num atoms comparison failed"
         assert newM2.GetNumAtoms() == m.GetNumAtoms(), "num atoms comparison failed"
         assert oldSmi == newSmi, "string compare failed: %s != %s" % (oldSmi, newSmi)
Пример #3
0
  def test2CatStringPickle(self):
    self._fillCat(self.smiList2)

    # test non-binary pickle:
    cat2 = cPickle.loads(cPickle.dumps(self.fragCat))
    assert cat2.GetNumEntries()==21
    assert cat2.GetFPLength()==21
    self._testBits(cat2)

    # test binary pickle:
    cat2 = cPickle.loads(cPickle.dumps(self.fragCat,1))
    assert cat2.GetNumEntries()==21
    assert cat2.GetFPLength()==21
    self._testBits(cat2)
Пример #4
0
    def testPickle(self):
      ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0),123)
      pkl = cPickle.dumps(ffeat)
      ffeat2 = cPickle.loads(pkl, encoding='bytes')
      self.assertTrue(ffeat2.GetId()==ffeat.GetId());      
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))

      # Check that the old pickled versions have not been broken        
      inF = open(os.path.join(RDConfig.RDBaseDir,
                              'Code/ChemicalFeatures/Wrap/testData/feat.pkl'),'rb')
      ffeat2=cPickle.load(inF, encoding='bytes')
      # this version (1.0) does not have an id in the byte stream 
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
        
      # Test the new version also has the id and works as expected
      
      # uncomment the following to generate (overrwrite) new version of pickled
      # data file
      #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+'))
      inF = open(os.path.join(RDConfig.RDBaseDir,
                              'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'rb')
      ffeat2=cPickle.load(inF, encoding='bytes')
      self.assertTrue(ffeat2.GetId()==ffeat.GetId());
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
Пример #5
0
 def test4Serialize(self):
   with open(self.smiName, 'r') as smiF:
     smiLines = smiF.readlines()
   fparams = FragmentCatalog.FragCatParams(1, 6, self.fName)
   fcat = FragmentCatalog.FragCatalog(fparams)
   fgen = FragmentCatalog.FragCatGenerator()
   suppl = Chem.SmilesMolSupplier(self.smiName, " ", 0, 1, 0)
   smiles = []
   for mol in suppl:
     nent = fgen.AddFragsFromMol(mol, fcat)
     smiles.append(Chem.MolToSmiles(mol))
   self.assertEqual(fcat.GetNumEntries(), 21)
   self.assertEqual(fcat.GetFPLength(), 21)
   pkl = cPickle.dumps(fcat)
   fcat2 = cPickle.loads(pkl)
   self.assertEqual(fcat2.GetNumEntries(), 21)
   self.assertEqual(fcat2.GetFPLength(), 21)
   fpgen = FragmentCatalog.FragFPGenerator()
   for i in range(len(smiles)):
     smi = smiles[i]
     mol = Chem.MolFromSmiles(smi)
     fp1 = fpgen.GetFPForMol(mol, fcat)
     fp2 = fpgen.GetFPForMol(mol, fcat2)
     self.assertEqual(fp1.GetNumOnBits(), fp2.GetNumOnBits())
     obl1 = fp1.GetOnBits()
     obl2 = fp2.GetOnBits()
     self.assertEqual(tuple(obl1), tuple(obl2))
Пример #6
0
  def test3Pickle2(self):
    """

    """
    l=1<<21
    v1 = ds.IntSparseIntVect(l)
    self.assertRaises(IndexError,lambda:v1[l+1])
    v1[0]=1
    v1[2]=2
    v1[1<<12]=3
    self.assertTrue(v1==v1)

    v2=  cPickle.loads(cPickle.dumps(v1))
    self.assertTrue(v2==v1)
    
    v3=  ds.IntSparseIntVect(v2.ToBinary())
    self.assertTrue(v2==v3)
    self.assertTrue(v1==v3)

    #cPickle.dump(v1,file('isiv.pkl','wb+'))
    with open(
      os.path.join(RDConfig.RDBaseDir, 
                   'Code/DataStructs/Wrap/testData/isiv.pkl'),
      'r'
      ) as tf:
      buf = tf.read().replace('\r\n', '\n').encode('utf-8')
      tf.close()
    with io.BytesIO(buf) as f:
      v3 = cPickle.load(f)
      self.assertTrue(v3==v1)
Пример #7
0
  def test3Pickle2(self):
    """

    """
    l=1<<21
    v1 = ds.IntSparseIntVect(l)
    self.assertRaises(IndexError,lambda:v1[l+1])
    v1[0]=1
    v1[2]=2
    v1[1<<12]=3
    self.assertTrue(v1==v1)

    v2=  cPickle.loads(cPickle.dumps(v1))
    self.assertTrue(v2==v1)
    
    v3=  ds.IntSparseIntVect(v2.ToBinary())
    self.assertTrue(v2==v3)
    self.assertTrue(v1==v3)

    #cPickle.dump(v1,file('isiv.pkl','wb+'))
    with open(
      os.path.join(RDConfig.RDBaseDir, 
                   'Code/DataStructs/Wrap/testData/isiv.pkl'),
      'rb'
      ) as f:
      v3 = cPickle.load(f)
      self.assertTrue(v3==v1)
Пример #8
0
 def testPkl1(self):
     " testing single molecule pickle "
     m = Chem.MolFromSmiles("CCOC")
     outS = Chem.MolToSmiles(m)
     m2 = cPickle.loads(cPickle.dumps(m))
     outS2 = Chem.MolToSmiles(m2)
     assert outS == outS2, "bad pickle: %s != %s" % (outS, outS2)
Пример #9
0
 def _testPkl10(self):
   " testing 5k molecule pickles "
   inLines = open('%s/NCI/first_5K.smi'%(RDConfig.RDDataDir),'r').readlines()
   smis = []
   for line in inLines:
     smis.append(line.split('\t')[0])
   for smi in smis:
     m = Chem.MolFromSmiles(smi)
     newM1 = cPickle.loads(cPickle.dumps(m))
     newSmi1 = Chem.MolToSmiles(newM1)
     newM2 = cPickle.loads(cPickle.dumps(newM1))
     newSmi2 = Chem.MolToSmiles(newM2)
     assert newM1.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed'
     assert newM2.GetNumAtoms()==m.GetNumAtoms(),'num atoms comparison failed'
     assert len(newSmi1)>0,'empty smi1'
     assert len(newSmi2)>0,'empty smi2'
     assert newSmi1==newSmi2,'string compare failed:\n%s\n\t!=\n%s\norig smiles:\n%s'%(newSmi1,newSmi2,smi)
Пример #10
0
  def test6PickleEquals(self):
    " testing pickled tree equals "
    self._readyTree()
    pkl = cPickle.dumps(self.baseTree)
    oTree = cPickle.loads(pkl)

    assert oTree == self.baseTree, 'Pickle inequality test failed'

    self.baseTree.PruneChild(self.baseTree.GetChildren()[0])
    assert oTree != self.baseTree, 'Pickle inequality test failed (bad Tree.__cmp__)'
Пример #11
0
  def test6PickleEquals(self):
    " testing pickled tree equals "
    self._readyTree()
    pkl = cPickle.dumps(self.baseTree)
    oTree = cPickle.loads(pkl)

    assert oTree == self.baseTree,'Pickle inequality test failed'

    self.baseTree.PruneChild(self.baseTree.GetChildren()[0])
    assert oTree != self.baseTree,'Pickle inequality test failed (bad Tree.__cmp__)'    
Пример #12
0
    def test2ExplicitPickle(self):
        nbits = 10000
        bv1 = DataStructs.ExplicitBitVect(nbits)
        for i in range(1000):
            x = random.randrange(0, nbits)
            bv1.SetBit(x)

        pkl = pickle.dumps(bv1, 1)
        bv2 = pickle.loads(pkl)
        for i in range(nbits):
            assert bv1[i] == bv2[i]
Пример #13
0
  def test2ExplicitPickle(self):
    nbits = 10000
    bv1 = DataStructs.ExplicitBitVect(nbits)
    for i in range(1000):
      x = random.randrange(0, nbits)
      bv1.SetBit(x)

    pkl = pickle.dumps(bv1, 1)
    bv2 = pickle.loads(pkl)
    for i in range(nbits):
      assert bv1[i] == bv2[i]
Пример #14
0
 def test3Pickles(self):
   #outF = file('../testData/rvvs.pkl','wb+')
   with open(os.path.join(RDConfig.RDBaseDir,
                  'Code/DataStructs/Wrap/testData/rvvs.pkl'),
     'rb') as inF:
       v1 = ds.RealValueVect(30)
       for i in range(15):
           v1[2*i] = 1.3
       v2 = cPickle.loads(cPickle.dumps(v1))
       self.assertAlmostEqual(ds.ComputeL1Norm(v1, v2), 0)
       #cPickle.dump(v1,outF)
       v2=cPickle.load(inF, encoding='bytes')
       self.assertAlmostEqual(ds.ComputeL1Norm(v1, v2), 0)
       self.assertAlmostEqual(v1.GetTotalVal(), v2.GetTotalVal())
       self.failUnless(v2.GetTotalVal()!=0)
Пример #15
0
    def test4GridPickles(self):
        grd = geom.UniformGrid3D(10.0, 9.0, 8.0, 0.5)
        self.assertTrue(grd.GetNumX() == 20)
        self.assertTrue(grd.GetNumY() == 18)
        self.assertTrue(grd.GetNumZ() == 16)
        grd.SetSphereOccupancy(geom.Point3D(-2.0, -2.0, 0.0), 1.5, 0.25)
        grd.SetSphereOccupancy(geom.Point3D(-2.0, 2.0, 0.0), 1.5, 0.25)
        grd.SetSphereOccupancy(geom.Point3D(2.0, -2.0, 0.0), 1.5, 0.25)
        grd.SetSphereOccupancy(geom.Point3D(2.0, 2.0, 0.0), 1.5, 0.25)

        self.assertTrue(geom.TanimotoDistance(grd,grd)==0.0)

        grd2 = cPickle.loads(cPickle.dumps(grd))
        self.assertTrue(grd2.GetNumX() == 20)
        self.assertTrue(grd2.GetNumY() == 18)
        self.assertTrue(grd2.GetNumZ() == 16)
        self.assertTrue(geom.TanimotoDistance(grd,grd2)==0.0)
Пример #16
0
  def test4GridPickles(self):
    grd = geom.UniformGrid3D(10.0, 9.0, 8.0, 0.5)
    self.assertTrue(grd.GetNumX() == 20)
    self.assertTrue(grd.GetNumY() == 18)
    self.assertTrue(grd.GetNumZ() == 16)
    grd.SetSphereOccupancy(geom.Point3D(-2.0, -2.0, 0.0), 1.5, 0.25)
    grd.SetSphereOccupancy(geom.Point3D(-2.0, 2.0, 0.0), 1.5, 0.25)
    grd.SetSphereOccupancy(geom.Point3D(2.0, -2.0, 0.0), 1.5, 0.25)
    grd.SetSphereOccupancy(geom.Point3D(2.0, 2.0, 0.0), 1.5, 0.25)

    self.assertTrue(geom.TanimotoDistance(grd, grd) == 0.0)

    grd2 = cPickle.loads(cPickle.dumps(grd))
    self.assertTrue(grd2.GetNumX() == 20)
    self.assertTrue(grd2.GetNumY() == 18)
    self.assertTrue(grd2.GetNumZ() == 16)
    self.assertTrue(geom.TanimotoDistance(grd, grd2) == 0.0)
Пример #17
0
  def test12Pickles(self):
    rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1')
    pkl = cPickle.dumps(rxn)
    rxn = cPickle.loads(pkl)
    mol = Chem.MolFromSmiles('C1ON1')
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)

    rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary())
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)
Пример #18
0
    def testPickle(self):
        ffeat = ChemicalFeatures.FreeChemicalFeature(
            "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123)
        pkl = cPickle.dumps(ffeat)
        ffeat2 = cPickle.loads(pkl, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Check that the old pickled versions have not been broken
        inTF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'r')
        buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
        inTF.close()
        inF = io.BytesIO(buf)
        ffeat2 = cPickle.load(inF, encoding='bytes')
        # this version (1.0) does not have an id in the byte stream
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Test the new version also has the id and works as expected

        # uncomment the following to generate (overrwrite) new version of pickled
        # data file
        #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+'))
        inTF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),
            'r')
        buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
        inTF.close()
        inF = io.BytesIO(buf)
        ffeat2 = cPickle.load(inF, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
Пример #19
0
  def test1(self):
    cat = MolCatalog.CreateMolCatalog()
    es = []
    for smi in ('C1CCC1OC','C1CCC1','C'):
      m = Chem.MolFromSmiles(smi)
      entry = MolCatalog.MolCatalogEntry()
      entry.SetMol(m)
      self.assertTrue(entry.GetMol())
      eSmi = Chem.MolToSmiles(entry.GetMol())
      self.assertTrue(eSmi==Chem.MolToSmiles(m))
      entry.SetDescription(smi)
      self.assertTrue(entry.GetDescription()==smi)
      es.append(entry)

    v=cat.AddEntry(es[0])
    self.assertTrue(v==0)
    self.assertTrue(cat.GetNumEntries()==1)

    v=cat.AddEntry(es[1])
    self.assertTrue(v==1)
    self.assertTrue(cat.GetNumEntries()==2)

    v=cat.AddEntry(es[2])
    self.assertTrue(v==2)
    self.assertTrue(cat.GetNumEntries()==3)

    cat.AddEdge(0,1)
    cat.AddEdge(0,2)
    cat.AddEdge(1,2)

    d = cPickle.dumps(cat)
    es = None
    entry = None
    cat=None

    cat = cPickle.loads(d)
    self.assertTrue(cat.GetNumEntries()==3)
    cat=None
Пример #20
0
  def test1(self):
    cat = MolCatalog.CreateMolCatalog()
    es = []
    for smi in ('C1CCC1OC', 'C1CCC1', 'C'):
      m = Chem.MolFromSmiles(smi)
      entry = MolCatalog.MolCatalogEntry()
      entry.SetMol(m)
      self.assertTrue(entry.GetMol())
      eSmi = Chem.MolToSmiles(entry.GetMol())
      self.assertTrue(eSmi == Chem.MolToSmiles(m))
      entry.SetDescription(smi)
      self.assertTrue(entry.GetDescription() == smi)
      es.append(entry)

    v = cat.AddEntry(es[0])
    self.assertTrue(v == 0)
    self.assertTrue(cat.GetNumEntries() == 1)

    v = cat.AddEntry(es[1])
    self.assertTrue(v == 1)
    self.assertTrue(cat.GetNumEntries() == 2)

    v = cat.AddEntry(es[2])
    self.assertTrue(v == 2)
    self.assertTrue(cat.GetNumEntries() == 3)

    cat.AddEdge(0, 1)
    cat.AddEdge(0, 2)
    cat.AddEdge(1, 2)

    d = cPickle.dumps(cat)
    es = None
    entry = None
    cat = None

    cat = cPickle.loads(d)
    self.assertTrue(cat.GetNumEntries() == 3)
    cat = None
Пример #21
0
  def test12Pickles(self):
    # 08/05/14
    # This test is changed due to a new behavior of the smarts
    # reaction parser which now allows using parenthesis in products
    # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1'
    rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)')
    pkl = cPickle.dumps(rxn)
    rxn = cPickle.loads(pkl)
    mol = Chem.MolFromSmiles('C1ON1')
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)

    rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary())
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)
Пример #22
0
  def test12Pickles(self):
    # 08/05/14
    # This test is changed due to a new behavior of the smarts
    # reaction parser which now allows using parenthesis in products
    # as well. original smiles: '[C:1]1[O:2][N:3]1>>[C:1]1[O:2].[N:3]1'
    rxn = rdChemReactions.ReactionFromSmarts('[C:1]1[O:2][N:3]1>>([C:1]1[O:2].[N:3]1)')
    pkl = cPickle.dumps(rxn)
    rxn = cPickle.loads(pkl)
    mol = Chem.MolFromSmiles('C1ON1')
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)

    rxn = rdChemReactions.ChemicalReaction(rxn.ToBinary())
    products = rxn.RunReactants([mol])
    self.assertEqual(len(products),1)
    for p in products:
      self.assertEqual(len(p),1)
      self.assertEqual(p[0].GetNumAtoms(),3)
      self.assertEqual(p[0].GetNumBonds(),2)
Пример #23
0
def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0):
  if details.lockRandom:
    seed = details.randomSeed
  else:
    import random
    seed = (random.randint(0, 1e6), random.randint(0, 1e6))
  DataUtils.InitRandomNumbers(seed)
  testExamples = []
  if details.shuffleActivities == 1:
    DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details)
  elif details.randomActivities == 1:
    DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details)

  namedExamples = data.GetNamedData()
  if details.splitRun == 1:
    trainIdx, testIdx = SplitData.SplitIndices(
      len(namedExamples), details.splitFrac, silent=not _verbose)

    trainExamples = [namedExamples[x] for x in trainIdx]
    testExamples = [namedExamples[x] for x in testIdx]
  else:
    testExamples = []
    testIdx = []
    trainIdx = list(range(len(namedExamples)))
    trainExamples = namedExamples

  if details.filterFrac != 0.0:
    # if we're doing quantization on the fly, we need to handle that here:
    if hasattr(details, 'activityBounds') and details.activityBounds:
      tExamples = []
      bounds = details.activityBounds
      for pt in trainExamples:
        pt = pt[:]
        act = pt[-1]
        placed = 0
        bound = 0
        while not placed and bound < len(bounds):
          if act < bounds[bound]:
            pt[-1] = bound
            placed = 1
          else:
            bound += 1
        if not placed:
          pt[-1] = bound
        tExamples.append(pt)
    else:
      bounds = None
      tExamples = trainExamples
    trainIdx, temp = DataUtils.FilterData(tExamples, details.filterVal, details.filterFrac, -1,
                                          indicesOnly=1)
    tmp = [trainExamples[x] for x in trainIdx]
    testExamples += [trainExamples[x] for x in temp]
    trainExamples = tmp

    counts = DataUtils.CountResults(trainExamples, bounds=bounds)
    ks = counts.keys()
    ks.sort()
    message('Result Counts in training set:')
    for k in ks:
      message(str((k, counts[k])))
    counts = DataUtils.CountResults(testExamples, bounds=bounds)
    ks = counts.keys()
    ks.sort()
    message('Result Counts in test set:')
    for k in ks:
      message(str((k, counts[k])))
  nExamples = len(trainExamples)
  message('Training with %d examples' % (nExamples))

  nVars = data.GetNVars()
  attrs = list(range(1, nVars + 1))
  nPossibleVals = data.GetNPossibleVals()
  for i in range(1, len(nPossibleVals)):
    if nPossibleVals[i - 1] == -1:
      attrs.remove(i)

  if details.pickleDataFileName != '':
    pickleDataFile = open(details.pickleDataFileName, 'wb+')
    cPickle.dump(trainExamples, pickleDataFile)
    cPickle.dump(testExamples, pickleDataFile)
    pickleDataFile.close()

  if details.bayesModel:
    composite = BayesComposite.BayesComposite()
  else:
    composite = Composite.Composite()

  composite._randomSeed = seed
  composite._splitFrac = details.splitFrac
  composite._shuffleActivities = details.shuffleActivities
  composite._randomizeActivities = details.randomActivities

  if hasattr(details, 'filterFrac'):
    composite._filterFrac = details.filterFrac
  if hasattr(details, 'filterVal'):
    composite._filterVal = details.filterVal

  composite.SetModelFilterData(details.modelFilterFrac, details.modelFilterVal)

  composite.SetActivityQuantBounds(details.activityBounds)
  nPossibleVals = data.GetNPossibleVals()
  if details.activityBounds:
    nPossibleVals[-1] = len(details.activityBounds) + 1

  if setDescNames:
    composite.SetInputOrder(data.GetVarNames())
    composite.SetDescriptorNames(details._descNames)
  else:
    composite.SetDescriptorNames(data.GetVarNames())
  composite.SetActivityQuantBounds(details.activityBounds)
  if details.nModels == 1:
    details.internalHoldoutFrac = 0.0
  if details.useTrees:
    from rdkit.ML.DecTree import CrossValidate, PruneTree
    if details.qBounds != []:
      from rdkit.ML.DecTree import BuildQuantTree
      builder = BuildQuantTree.QuantTreeBoot
    else:
      from rdkit.ML.DecTree import ID3
      builder = ID3.ID3Boot
    driver = CrossValidate.CrossValidationDriver
    pruner = PruneTree.PruneTree

    composite.SetQuantBounds(details.qBounds)
    nPossibleVals = data.GetNPossibleVals()
    if details.activityBounds:
      nPossibleVals[-1] = len(details.activityBounds) + 1
    composite.Grow(
      trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver, pruner=pruner,
      nTries=details.nModels, pruneIt=details.pruneIt, lessGreedy=details.lessGreedy,
      needsQuantization=0, treeBuilder=builder, nQuantBounds=details.qBounds,
      startAt=details.startAt, maxDepth=details.limitDepth, progressCallback=progressCallback,
      holdOutFrac=details.internalHoldoutFrac, replacementSelection=details.replacementSelection,
      recycleVars=details.recycleVars, randomDescriptors=details.randomDescriptors,
      silent=not _verbose)

  elif details.useSigTrees:
    from rdkit.ML.DecTree import CrossValidate
    from rdkit.ML.DecTree import BuildSigTree
    builder = BuildSigTree.SigTreeBuilder
    driver = CrossValidate.CrossValidationDriver
    nPossibleVals = data.GetNPossibleVals()
    if details.activityBounds:
      nPossibleVals[-1] = len(details.activityBounds) + 1
    if hasattr(details, 'sigTreeBiasList'):
      biasList = details.sigTreeBiasList
    else:
      biasList = None
    if hasattr(details, 'useCMIM'):
      useCMIM = details.useCMIM
    else:
      useCMIM = 0
    if hasattr(details, 'allowCollections'):
      allowCollections = details.allowCollections
    else:
      allowCollections = False
    composite.Grow(
      trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver,
      nTries=details.nModels, needsQuantization=0, treeBuilder=builder, maxDepth=details.limitDepth,
      progressCallback=progressCallback, holdOutFrac=details.internalHoldoutFrac,
      replacementSelection=details.replacementSelection, recycleVars=details.recycleVars,
      randomDescriptors=details.randomDescriptors, biasList=biasList, useCMIM=useCMIM,
      allowCollection=allowCollections, silent=not _verbose)

  elif details.useKNN:
    from rdkit.ML.KNN import CrossValidate
    from rdkit.ML.KNN import DistFunctions

    driver = CrossValidate.CrossValidationDriver
    dfunc = ''
    if (details.knnDistFunc == "Euclidean"):
      dfunc = DistFunctions.EuclideanDist
    elif (details.knnDistFunc == "Tanimoto"):
      dfunc = DistFunctions.TanimotoDist
    else:
      assert 0, "Bad KNN distance metric value"

    composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver,
                   nTries=details.nModels, needsQuantization=0, numNeigh=details.knnNeighs,
                   holdOutFrac=details.internalHoldoutFrac, distFunc=dfunc)

  elif details.useNaiveBayes or details.useSigBayes:
    from rdkit.ML.NaiveBayes import CrossValidate
    driver = CrossValidate.CrossValidationDriver
    if not (hasattr(details, 'useSigBayes') and details.useSigBayes):
      composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver,
                     nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds,
                     holdOutFrac=details.internalHoldoutFrac,
                     replacementSelection=details.replacementSelection,
                     mEstimateVal=details.mEstimateVal, silent=not _verbose)
    else:
      if hasattr(details, 'useCMIM'):
        useCMIM = details.useCMIM
      else:
        useCMIM = 0

      composite.Grow(trainExamples, attrs, nPossibleVals=[0] + nPossibleVals, buildDriver=driver,
                     nTries=details.nModels, needsQuantization=0, nQuantBounds=details.qBounds,
                     mEstimateVal=details.mEstimateVal, useSigs=True, useCMIM=useCMIM,
                     holdOutFrac=details.internalHoldoutFrac,
                     replacementSelection=details.replacementSelection, silent=not _verbose)

    # #   elif details.useSVM:
    # #     from rdkit.ML.SVM import CrossValidate
    # #     driver = CrossValidate.CrossValidationDriver
    # #     composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals,
    # #                    buildDriver=driver, nTries=details.nModels,
    # #                    needsQuantization=0,
    # #                    cost=details.svmCost,gamma=details.svmGamma,
    # #                    weights=details.svmWeights,degree=details.svmDegree,
    # #                    type=details.svmType,kernelType=details.svmKernel,
    # #                    coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu,
    # #                    cache_size=details.svmCache,shrinking=details.svmShrink,
    # #                    dataType=details.svmDataType,
    # #                    holdOutFrac=details.internalHoldoutFrac,
    # #                    replacementSelection=details.replacementSelection,
    # #                    silent=not _verbose)

  else:
    from rdkit.ML.Neural import CrossValidate
    driver = CrossValidate.CrossValidationDriver
    composite.Grow(trainExamples, attrs, [0] + nPossibleVals, nTries=details.nModels,
                   buildDriver=driver, needsQuantization=0)

  composite.AverageErrors()
  composite.SortModels()
  modelList, counts, avgErrs = composite.GetAllData()
  counts = numpy.array(counts)
  avgErrs = numpy.array(avgErrs)
  composite._varNames = data.GetVarNames()

  for i in range(len(modelList)):
    modelList[i].NameModel(composite._varNames)

  # do final statistics
  weightedErrs = counts * avgErrs
  averageErr = sum(weightedErrs) / sum(counts)
  devs = (avgErrs - averageErr)
  devs = devs * counts
  devs = numpy.sqrt(devs * devs)
  avgDev = sum(devs) / sum(counts)
  message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' %
          (100. * averageErr, 100. * avgDev))

  if details.bayesModel:
    composite.Train(trainExamples, verbose=0)

  # blow out the saved examples and then save the composite:
  composite.ClearModelExamples()
  if saveIt:
    composite.Pickle(details.outName)
  details.model = DbModule.binaryHolder(cPickle.dumps(composite))

  badExamples = []
  if not details.detailedRes and (not hasattr(details, 'noScreen') or not details.noScreen):
    if details.splitRun:
      message('Testing all hold-out examples')
      wrong = testall(composite, testExamples, badExamples)
      message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) /
                                                             float(len(testExamples))))
      _runDetails.holdout_error = float(len(wrong)) / len(testExamples)
    else:
      message('Testing all examples')
      wrong = testall(composite, namedExamples, badExamples)
      message('%d examples (%% %5.2f) were misclassified' % (len(wrong), 100. * float(len(wrong)) /
                                                             float(len(namedExamples))))
      _runDetails.overall_error = float(len(wrong)) / len(namedExamples)

  if details.detailedRes:
    message('\nEntire data set:')
    resTup = ScreenComposite.ShowVoteResults(
      range(data.GetNPts()), data, composite, nPossibleVals[-1], details.threshold)
    nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
    nPts = len(namedExamples)
    nClass = nGood + nBad
    _runDetails.overall_error = float(nBad) / nClass
    _runDetails.overall_correct_conf = avgGood
    _runDetails.overall_incorrect_conf = avgBad
    _runDetails.overall_result_matrix = repr(voteTab)
    nRej = nClass - nPts
    if nRej > 0:
      _runDetails.overall_fraction_dropped = float(nRej) / nPts

    if details.splitRun:
      message('\nHold-out data:')
      resTup = ScreenComposite.ShowVoteResults(
        range(len(testExamples)), testExamples, composite, nPossibleVals[-1], details.threshold)
      nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
      nPts = len(testExamples)
      nClass = nGood + nBad
      _runDetails.holdout_error = float(nBad) / nClass
      _runDetails.holdout_correct_conf = avgGood
      _runDetails.holdout_incorrect_conf = avgBad
      _runDetails.holdout_result_matrix = repr(voteTab)
      nRej = nClass - nPts
      if nRej > 0:
        _runDetails.holdout_fraction_dropped = float(nRej) / nPts

  if details.persistTblName and details.dbName:
    message('Updating results table %s:%s' % (details.dbName, details.persistTblName))
    details.Store(db=details.dbName, table=details.persistTblName)

  if details.badName != '':
    badFile = open(details.badName, 'w+')
    for i in range(len(badExamples)):
      ex = badExamples[i]
      vote = wrong[i]
      outStr = '%s\t%s\n' % (ex, vote)
      badFile.write(outStr)
    badFile.close()

  composite.ClearModelExamples()
  return composite
Пример #24
0
    def test1cPointND(self):
        dim=4
        pt = geom.PointND(4);
        for i in range(dim):
            self.assertTrue(feq(pt[i], 0.0))
        
        pt[0]=3
        pt[3]=4
        self.assertTrue(feq(pt[0], 3.0))
        self.assertTrue(feq(pt[3], 4.0))
        self.assertTrue(feq(pt[-4], 3.0))
        self.assertTrue(feq(pt[-1], 4.0))
        lst = list(pt)
        self.assertTrue(feq(lst[0], 3.0))
        self.assertTrue(feq(lst[3], 4.0))


        pt2 = geom.PointND(4)
        pt2[0]=1.
        pt2[2]=1.

        pt3 = pt+pt2
        self.assertTrue(feq(pt3[0], 4.0))
        self.assertTrue(feq(pt3[2], 1.0))
        self.assertTrue(feq(pt3[3], 4.0))
        
        pt += pt2
        self.assertTrue(feq(pt[0], 4.0))
        self.assertTrue(feq(pt[2], 1.0))
        self.assertTrue(feq(pt[3], 4.0))

        pt3 = pt-pt2
        self.assertTrue(feq(pt3[0], 3.0))
        self.assertTrue(feq(pt3[2], 0.0))
        self.assertTrue(feq(pt3[3], 4.0))
        
        pt -= pt2
        self.assertTrue(feq(pt[0], 3.0))
        self.assertTrue(feq(pt[2], 0.0))
        self.assertTrue(feq(pt[3], 4.0))

        pt *= 2.0
        self.assertTrue(feq(pt[0], 6.0))
        self.assertTrue(feq(pt[1], 0.0))
        self.assertTrue(feq(pt[2], 0.0))
        self.assertTrue(feq(pt[3], 8.0))

        
        pt /= 2
        self.assertTrue(feq(pt[0], 3.0))
        self.assertTrue(feq(pt[1], 0.0))
        self.assertTrue(feq(pt[2], 0.0))
        self.assertTrue(feq(pt[3], 4.0))

        self.assertTrue(feq(pt.Length(), 5.0))
        self.assertTrue(feq(pt.LengthSq(), 25.0))
        pt.Normalize()
        self.assertTrue(feq(pt.Length(), 1.0))

        pkl = cPickle.dumps(pt)
        pt2 = cPickle.loads(pkl)
        self.assertTrue(len(pt)==len(pt2))
        for i in range(len(pt)):
            self.assertTrue(feq(pt2[i],pt[i]))
Пример #25
0
       if nWeights==1:
         outName = _runDetails.outName
         composites[0].Pickle(outName)
       else:
         for i in range(nWeights):
           weight = int(100*_runDetails.balWeight[i])
           model = composites[i]
           outName = '%s.%d.pkl'%(_runDetails.outName.split('.pkl')[0],weight)
           model.Pickle(outName)
     if _runDetails.persistTblName and _runDetails.dbName:
       message('Updating results table %s:%s'%(_runDetails.dbName,_runDetails.persistTblName))
       if(len(_runDetails.balWeight))>1:
         message('WARNING: updating results table with models having different weights')
       # save the composite
       for i in range(len(composites)):
         _runDetails.model = cPickle.dumps(composites[i])
         _runDetails.Store(db=_runDetails.dbName,table=_runDetails.persistTblName)
 elif nModels==1:
   composite = GrowIt(_runDetails,initModels[0],setDescNames=1)
   if _runDetails.balTable and _runDetails.balCnt:
     composites = BalanceComposite(_runDetails,composite)
   else:
     composites=[composite]
   for mdl in composites:
     mdl.ClearModelExamples()
   if _runDetails.outName:
     nWeights = len(_runDetails.balWeight)
     if nWeights==1:
       outName = _runDetails.outName
       composites[0].Pickle(outName)
     else:
Пример #26
0
def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0):
    nExamples = data.GetNPts()
    if details.lockRandom:
        seed = details.randomSeed
    else:
        import random
        seed = (random.randint(0, 1e6), random.randint(0, 1e6))
    DataUtils.InitRandomNumbers(seed)
    testExamples = []
    if details.shuffleActivities == 1:
        DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details)
    elif details.randomActivities == 1:
        DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details)

    namedExamples = data.GetNamedData()
    if details.splitRun == 1:
        trainIdx, testIdx = SplitData.SplitIndices(len(namedExamples),
                                                   details.splitFrac,
                                                   silent=not _verbose)

        trainExamples = [namedExamples[x] for x in trainIdx]
        testExamples = [namedExamples[x] for x in testIdx]
    else:
        testExamples = []
        testIdx = []
        trainIdx = range(len(namedExamples))
        trainExamples = namedExamples

    if details.filterFrac != 0.0:
        # if we're doing quantization on the fly, we need to handle that here:
        if hasattr(details, 'activityBounds') and details.activityBounds:
            tExamples = []
            bounds = details.activityBounds
            for pt in trainExamples:
                pt = pt[:]
                act = pt[-1]
                placed = 0
                bound = 0
                while not placed and bound < len(bounds):
                    if act < bounds[bound]:
                        pt[-1] = bound
                        placed = 1
                    else:
                        bound += 1
                if not placed:
                    pt[-1] = bound
                tExamples.append(pt)
        else:
            bounds = None
            tExamples = trainExamples
        trainIdx, temp = DataUtils.FilterData(tExamples,
                                              details.filterVal,
                                              details.filterFrac,
                                              -1,
                                              indicesOnly=1)
        tmp = [trainExamples[x] for x in trainIdx]
        testExamples += [trainExamples[x] for x in temp]
        trainExamples = tmp

        counts = DataUtils.CountResults(trainExamples, bounds=bounds)
        ks = counts.keys()
        ks.sort()
        message('Result Counts in training set:')
        for k in ks:
            message(str((k, counts[k])))
        counts = DataUtils.CountResults(testExamples, bounds=bounds)
        ks = counts.keys()
        ks.sort()
        message('Result Counts in test set:')
        for k in ks:
            message(str((k, counts[k])))
    nExamples = len(trainExamples)
    message('Training with %d examples' % (nExamples))

    nVars = data.GetNVars()
    attrs = range(1, nVars + 1)
    nPossibleVals = data.GetNPossibleVals()
    for i in range(1, len(nPossibleVals)):
        if nPossibleVals[i - 1] == -1:
            attrs.remove(i)

    if details.pickleDataFileName != '':
        pickleDataFile = open(details.pickleDataFileName, 'wb+')
        cPickle.dump(trainExamples, pickleDataFile)
        cPickle.dump(testExamples, pickleDataFile)
        pickleDataFile.close()

    if details.bayesModel:
        composite = BayesComposite.BayesComposite()
    else:
        composite = Composite.Composite()

    composite._randomSeed = seed
    composite._splitFrac = details.splitFrac
    composite._shuffleActivities = details.shuffleActivities
    composite._randomizeActivities = details.randomActivities

    if hasattr(details, 'filterFrac'):
        composite._filterFrac = details.filterFrac
    if hasattr(details, 'filterVal'):
        composite._filterVal = details.filterVal

    composite.SetModelFilterData(details.modelFilterFrac,
                                 details.modelFilterVal)

    composite.SetActivityQuantBounds(details.activityBounds)
    nPossibleVals = data.GetNPossibleVals()
    if details.activityBounds:
        nPossibleVals[-1] = len(details.activityBounds) + 1

    if setDescNames:
        composite.SetInputOrder(data.GetVarNames())
        composite.SetDescriptorNames(details._descNames)
    else:
        composite.SetDescriptorNames(data.GetVarNames())
    composite.SetActivityQuantBounds(details.activityBounds)
    if details.nModels == 1:
        details.internalHoldoutFrac = 0.0
    if details.useTrees:
        from rdkit.ML.DecTree import CrossValidate, PruneTree
        if details.qBounds != []:
            from rdkit.ML.DecTree import BuildQuantTree
            builder = BuildQuantTree.QuantTreeBoot
        else:
            from rdkit.ML.DecTree import ID3
            builder = ID3.ID3Boot
        driver = CrossValidate.CrossValidationDriver
        pruner = PruneTree.PruneTree

        composite.SetQuantBounds(details.qBounds)
        nPossibleVals = data.GetNPossibleVals()
        if details.activityBounds:
            nPossibleVals[-1] = len(details.activityBounds) + 1
        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       pruner=pruner,
                       nTries=details.nModels,
                       pruneIt=details.pruneIt,
                       lessGreedy=details.lessGreedy,
                       needsQuantization=0,
                       treeBuilder=builder,
                       nQuantBounds=details.qBounds,
                       startAt=details.startAt,
                       maxDepth=details.limitDepth,
                       progressCallback=progressCallback,
                       holdOutFrac=details.internalHoldoutFrac,
                       replacementSelection=details.replacementSelection,
                       recycleVars=details.recycleVars,
                       randomDescriptors=details.randomDescriptors,
                       silent=not _verbose)

    elif details.useSigTrees:
        from rdkit.ML.DecTree import CrossValidate
        from rdkit.ML.DecTree import BuildSigTree
        builder = BuildSigTree.SigTreeBuilder
        driver = CrossValidate.CrossValidationDriver
        nPossibleVals = data.GetNPossibleVals()
        if details.activityBounds:
            nPossibleVals[-1] = len(details.activityBounds) + 1
        if hasattr(details, 'sigTreeBiasList'):
            biasList = details.sigTreeBiasList
        else:
            biasList = None
        if hasattr(details, 'useCMIM'):
            useCMIM = details.useCMIM
        else:
            useCMIM = 0
        if hasattr(details, 'allowCollections'):
            allowCollections = details.allowCollections
        else:
            allowCollections = False
        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       nTries=details.nModels,
                       needsQuantization=0,
                       treeBuilder=builder,
                       maxDepth=details.limitDepth,
                       progressCallback=progressCallback,
                       holdOutFrac=details.internalHoldoutFrac,
                       replacementSelection=details.replacementSelection,
                       recycleVars=details.recycleVars,
                       randomDescriptors=details.randomDescriptors,
                       biasList=biasList,
                       useCMIM=useCMIM,
                       allowCollection=allowCollections,
                       silent=not _verbose)

    elif details.useKNN:
        from rdkit.ML.KNN import CrossValidate
        from rdkit.ML.KNN import DistFunctions

        driver = CrossValidate.CrossValidationDriver
        dfunc = ''
        if (details.knnDistFunc == "Euclidean"):
            dfunc = DistFunctions.EuclideanDist
        elif (details.knnDistFunc == "Tanimoto"):
            dfunc = DistFunctions.TanimotoDist
        else:
            assert 0, "Bad KNN distance metric value"

        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       nTries=details.nModels,
                       needsQuantization=0,
                       numNeigh=details.knnNeighs,
                       holdOutFrac=details.internalHoldoutFrac,
                       distFunc=dfunc)

    elif details.useNaiveBayes or details.useSigBayes:
        from rdkit.ML.NaiveBayes import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        if not (hasattr(details, 'useSigBayes') and details.useSigBayes):
            composite.Grow(trainExamples,
                           attrs,
                           nPossibleVals=[0] + nPossibleVals,
                           buildDriver=driver,
                           nTries=details.nModels,
                           needsQuantization=0,
                           nQuantBounds=details.qBounds,
                           holdOutFrac=details.internalHoldoutFrac,
                           replacementSelection=details.replacementSelection,
                           mEstimateVal=details.mEstimateVal,
                           silent=not _verbose)
        else:
            if hasattr(details, 'useCMIM'):
                useCMIM = details.useCMIM
            else:
                useCMIM = 0

            composite.Grow(trainExamples,
                           attrs,
                           nPossibleVals=[0] + nPossibleVals,
                           buildDriver=driver,
                           nTries=details.nModels,
                           needsQuantization=0,
                           nQuantBounds=details.qBounds,
                           mEstimateVal=details.mEstimateVal,
                           useSigs=True,
                           useCMIM=useCMIM,
                           holdOutFrac=details.internalHoldoutFrac,
                           replacementSelection=details.replacementSelection,
                           silent=not _verbose)


##   elif details.useSVM:
##     from rdkit.ML.SVM import CrossValidate
##     driver = CrossValidate.CrossValidationDriver
##     composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals,
##                    buildDriver=driver, nTries=details.nModels,
##                    needsQuantization=0,
##                    cost=details.svmCost,gamma=details.svmGamma,
##                    weights=details.svmWeights,degree=details.svmDegree,
##                    type=details.svmType,kernelType=details.svmKernel,
##                    coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu,
##                    cache_size=details.svmCache,shrinking=details.svmShrink,
##                    dataType=details.svmDataType,
##                    holdOutFrac=details.internalHoldoutFrac,
##                    replacementSelection=details.replacementSelection,
##                    silent=not _verbose)

    else:
        from rdkit.ML.Neural import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        composite.Grow(trainExamples,
                       attrs, [0] + nPossibleVals,
                       nTries=details.nModels,
                       buildDriver=driver,
                       needsQuantization=0)

    composite.AverageErrors()
    composite.SortModels()
    modelList, counts, avgErrs = composite.GetAllData()
    counts = numpy.array(counts)
    avgErrs = numpy.array(avgErrs)
    composite._varNames = data.GetVarNames()

    for i in range(len(modelList)):
        modelList[i].NameModel(composite._varNames)

    # do final statistics
    weightedErrs = counts * avgErrs
    averageErr = sum(weightedErrs) / sum(counts)
    devs = (avgErrs - averageErr)
    devs = devs * counts
    devs = numpy.sqrt(devs * devs)
    avgDev = sum(devs) / sum(counts)
    message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' %
            (100. * averageErr, 100. * avgDev))

    if details.bayesModel:
        composite.Train(trainExamples, verbose=0)

    # blow out the saved examples and then save the composite:
    composite.ClearModelExamples()
    if saveIt:
        composite.Pickle(details.outName)
    details.model = DbModule.binaryHolder(cPickle.dumps(composite))

    badExamples = []
    if not details.detailedRes and (not hasattr(details, 'noScreen')
                                    or not details.noScreen):
        if details.splitRun:
            message('Testing all hold-out examples')
            wrong = testall(composite, testExamples, badExamples)
            message('%d examples (%% %5.2f) were misclassified' %
                    (len(wrong),
                     100. * float(len(wrong)) / float(len(testExamples))))
            _runDetails.holdout_error = float(len(wrong)) / len(testExamples)
        else:
            message('Testing all examples')
            wrong = testall(composite, namedExamples, badExamples)
            message('%d examples (%% %5.2f) were misclassified' %
                    (len(wrong),
                     100. * float(len(wrong)) / float(len(namedExamples))))
            _runDetails.overall_error = float(len(wrong)) / len(namedExamples)

    if details.detailedRes:
        message('\nEntire data set:')
        resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()), data,
                                                 composite, nPossibleVals[-1],
                                                 details.threshold)
        nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
        nPts = len(namedExamples)
        nClass = nGood + nBad
        _runDetails.overall_error = float(nBad) / nClass
        _runDetails.overall_correct_conf = avgGood
        _runDetails.overall_incorrect_conf = avgBad
        _runDetails.overall_result_matrix = repr(voteTab)
        nRej = nClass - nPts
        if nRej > 0:
            _runDetails.overall_fraction_dropped = float(nRej) / nPts

        if details.splitRun:
            message('\nHold-out data:')
            resTup = ScreenComposite.ShowVoteResults(range(len(testExamples)),
                                                     testExamples, composite,
                                                     nPossibleVals[-1],
                                                     details.threshold)
            nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
            nPts = len(testExamples)
            nClass = nGood + nBad
            _runDetails.holdout_error = float(nBad) / nClass
            _runDetails.holdout_correct_conf = avgGood
            _runDetails.holdout_incorrect_conf = avgBad
            _runDetails.holdout_result_matrix = repr(voteTab)
            nRej = nClass - nPts
            if nRej > 0:
                _runDetails.holdout_fraction_dropped = float(nRej) / nPts

    if details.persistTblName and details.dbName:
        message('Updating results table %s:%s' %
                (details.dbName, details.persistTblName))
        details.Store(db=details.dbName, table=details.persistTblName)

    if details.badName != '':
        badFile = open(details.badName, 'w+')
        for i in range(len(badExamples)):
            ex = badExamples[i]
            vote = wrong[i]
            outStr = '%s\t%s\n' % (ex, vote)
            badFile.write(outStr)
        badFile.close()

    composite.ClearModelExamples()
    return composite
Пример #27
0
  def test1cPointND(self):
    dim = 4
    pt = geom.PointND(4)
    for i in range(dim):
      self.assertTrue(feq(pt[i], 0.0))

    pt[0] = 3
    pt[3] = 4
    self.assertTrue(feq(pt[0], 3.0))
    self.assertTrue(feq(pt[3], 4.0))
    self.assertTrue(feq(pt[-4], 3.0))
    self.assertTrue(feq(pt[-1], 4.0))
    lst = list(pt)
    self.assertTrue(feq(lst[0], 3.0))
    self.assertTrue(feq(lst[3], 4.0))

    pt2 = geom.PointND(4)
    pt2[0] = 1.
    pt2[2] = 1.

    pt3 = pt + pt2
    self.assertTrue(feq(pt3[0], 4.0))
    self.assertTrue(feq(pt3[2], 1.0))
    self.assertTrue(feq(pt3[3], 4.0))

    pt += pt2
    self.assertTrue(feq(pt[0], 4.0))
    self.assertTrue(feq(pt[2], 1.0))
    self.assertTrue(feq(pt[3], 4.0))

    pt3 = pt - pt2
    self.assertTrue(feq(pt3[0], 3.0))
    self.assertTrue(feq(pt3[2], 0.0))
    self.assertTrue(feq(pt3[3], 4.0))

    pt -= pt2
    self.assertTrue(feq(pt[0], 3.0))
    self.assertTrue(feq(pt[2], 0.0))
    self.assertTrue(feq(pt[3], 4.0))

    pt *= 2.0
    self.assertTrue(feq(pt[0], 6.0))
    self.assertTrue(feq(pt[1], 0.0))
    self.assertTrue(feq(pt[2], 0.0))
    self.assertTrue(feq(pt[3], 8.0))

    pt /= 2
    self.assertTrue(feq(pt[0], 3.0))
    self.assertTrue(feq(pt[1], 0.0))
    self.assertTrue(feq(pt[2], 0.0))
    self.assertTrue(feq(pt[3], 4.0))

    self.assertTrue(feq(pt.Length(), 5.0))
    self.assertTrue(feq(pt.LengthSq(), 25.0))
    pt.Normalize()
    self.assertTrue(feq(pt.Length(), 1.0))

    pkl = cPickle.dumps(pt)
    pt2 = cPickle.loads(pkl)
    self.assertTrue(len(pt) == len(pt2))
    for i in range(len(pt)):
      self.assertTrue(feq(pt2[i], pt[i]))
Пример #28
0
from __future__ import print_function
from rdkit import RDConfig
from rdkit.Dbase import DbModule
from rdkit.Dbase.DbConnection import DbConnect
from rdkit.six.moves import cPickle
from rdkit import Chem

if RDConfig.usePgSQL:
    dbName = "::RDTests"
else:
    dbName = "data.sqlt"

molTblName = 'simple_mols1'
fpTblName = 'simple_mols1_fp'
conn = DbConnect(dbName, molTblName)
conn.AddTable(fpTblName,
              'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName)
d = conn.GetData()
for smi, id in d:
    print(repr(id), repr(smi))
    mol = Chem.MolFromSmiles(smi)
    fp = Chem.RDKFingerprint(mol)
    pkl = cPickle.dumps(fp)
    conn.InsertData(fpTblName, (id, DbModule.binaryHolder(pkl)))
conn.Commit()
Пример #29
0
  def test3Pickles(self):
    #outF = file('dvvs.pkl','wb+')
    with open(
      os.path.join(RDConfig.RDBaseDir,
                   'Code/DataStructs/Wrap/testData/dvvs.pkl'),
      'r'
      ) as inTF:
      buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
      inTF.close()
    with io.BytesIO(buf) as inF:
      v1 = ds.DiscreteValueVect(ds.DiscreteValueType.ONEBITVALUE, 30)
      for i in range(15):
        v1[2*i] = 1
      v2 = pickle.loads(pickle.dumps(v1))
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      #cPickle.dump(v1,outF)
      v2=pickle.load(inF, encoding='bytes')
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal())
      self.assertTrue(v2.GetTotalVal()!=0)

      v1 = ds.DiscreteValueVect(ds.DiscreteValueType.TWOBITVALUE, 30)
      for i in range(30):
        v1[i] = i%4
      v2 = pickle.loads(pickle.dumps(v1))
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      #pickle.dump(v1,outF)
      v2=pickle.load(inF, encoding='bytes')
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal())
      self.assertTrue(v2.GetTotalVal()!=0)
      
      v1 = ds.DiscreteValueVect(ds.DiscreteValueType.FOURBITVALUE, 16)
      for i in range(16):
        v1[i] = i%16
      v2 = pickle.loads(pickle.dumps(v1))
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      #pickle.dump(v1,outF)
      v2=pickle.load(inF, encoding='bytes')
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal())
      self.assertTrue(v2.GetTotalVal()!=0)

      v1 = ds.DiscreteValueVect(ds.DiscreteValueType.EIGHTBITVALUE, 5)
      v1[0] = 34
      v1[1] = 167
      v1[2] = 3
      v1[3] = 56
      v1[4] = 128
      v2 = pickle.loads(pickle.dumps(v1))
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      #pickle.dump(v1,outF)
      v2=pickle.load(inF, encoding='bytes')
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal())
      self.assertTrue(v2.GetTotalVal()!=0)

      v1 = ds.DiscreteValueVect(ds.DiscreteValueType.SIXTEENBITVALUE, 3)
      v1[0] = 2345
      v1[1] = 64578
      v1[2] = 34
      v2 = pickle.loads(pickle.dumps(v1))
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      #pickle.dump(v1,outF)
      v2=pickle.load(inF, encoding='bytes')
      self.assertTrue(ds.ComputeL1Norm(v1, v2) == 0)
      self.assertTrue(v1.GetTotalVal()==v2.GetTotalVal())
      self.assertTrue(v2.GetTotalVal()!=0)
Пример #30
0
from __future__ import print_function
from rdkit import RDConfig
from rdkit.Dbase import DbModule
from rdkit.Dbase.DbConnection import DbConnect
from rdkit.six.moves import cPickle
from rdkit import Chem

if RDConfig.usePgSQL:
  dbName = "::RDTests"
else:
  dbName = "data.sqlt"

molTblName = 'simple_mols1'
fpTblName = 'simple_mols1_fp'
conn = DbConnect(dbName, molTblName)
conn.AddTable(fpTblName, 'id varchar(10),autofragmentfp %s' % DbModule.binaryTypeName)
d = conn.GetData()
for smi, id in d:
  print(repr(id), repr(smi))
  mol = Chem.MolFromSmiles(smi)
  fp = Chem.RDKFingerprint(mol)
  pkl = cPickle.dumps(fp)
  conn.InsertData(fpTblName, (id, DbModule.binaryHolder(pkl)))
conn.Commit()