def test_MetaDataHandleForSavingModel(self): """Test the handling of SaveModel for Data with Meta Atributes """ expectedAccWMeta = 1.0 # VEr 0.3 expectedAccNoMetaValues = [ 0.56666666700000001, # Ver 0.3 0.563636364 ] #Test the save of a model created from a train data with meta attributes self.assert_( len(self.WMetaTest.domain.getmetas()) >= 1, "The dataset WMetaTest should have Meta Attributes") RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0") rfM = RFlearner(self.WMetaTest) AccNoMetaBefore = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, rfM) AccWMetaBefore = evalUtilities.getClassificationAccuracy( self.WMetaTest, rfM) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "RFModel.RF") rfM.write(modelPath) # Read in the model rfR = AZorngRF.RFread(modelPath) self.assert_( len(rfR.domain.getmetas()) == 0, "There shouldn't be any Meta data now!") # Calculate classification accuracy AccNoMetaAfter = evalUtilities.getClassificationAccuracy( self.NoMetaTrain, rfR) AccWMetaAfter = evalUtilities.getClassificationAccuracy( self.WMetaTest, rfR) # Test that the accuracy of the model before and after saved self.assertEqual( AccNoMetaBefore, AccNoMetaAfter, "NoMeta: Predictions after loading saved model were different") self.assertEqual( AccWMetaBefore, AccWMetaAfter, "WMeta: Predictions after loading saved model were different") self.assertEqual(round(AccWMetaAfter, 9), round(expectedAccWMeta, 9)) self.assertRoundedToExpectedArray(AccNoMetaAfter, expectedAccNoMetaValues, 9) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_save_load_Regression_D_Attr(self): """ Test Save/Load Regression model with Discrete Attribute""" #Create a selector to select just the correct attributes selector = range(len(self.RegDAttr.domain)) #Remove the second attribute (idx=1) selector.pop(1) #Apply the selector to the self.RegDAttr data = self.RegDAttr.select(selector) RFsign = AZorngRF.RFLearner(data, nTrees=200, nActVars=155, maxDepth=100) res1 = [] for ex in self.RegDAttr: res1.append(str(RFsign(ex))) scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "RFModel") RFsign.write(modelPath) loadedRFmodel = AZorngRF.RFread(modelPath) res2 = [] for ex in self.RegDAttr: res2.append(str(loadedRFmodel(ex))) self.assertEqual(res1, res2) self.assertEqual(res1, [ '5.404782', '2.568249', '2.979486', '4.287185', '5.335753', '4.439877', '3.682451', '8.054751', '6.511803', '5.760388', '7.771009', '2.328262', '6.062288', '5.577081', '3.639579', '6.862591', '3.793468', '2.865258', '3.531777', '6.833398', '6.376686', '3.338588', '7.002612', '7.137580', '7.258987', '6.899173', '7.547265', '8.708020', '6.262212', '7.563741', '8.166364', '6.614120', '7.865033', '9.060866', '8.057292', '4.877943', '7.993115', '9.198319', '9.428467', '8.537990', '9.130789', '6.328936', '8.247712', '7.605743', '8.755456', '6.983065', '7.712387', '9.972745', '9.763152', '7.934700', '8.447981', '7.272462', '8.824869', '7.654151', '7.795481', '7.229007', '8.680950', '9.439033', '9.130064', '8.505672', '8.082146', '6.086042', '7.493593', '8.981513', '8.880632', '6.548739' ]) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_Priors(self): """Test to assure that priors are set correcly.""" # Create a RF model RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0", priors = {"Iris-versicolor":0.35, "Iris-virginica":0.13, "Iris-setosa":0.52}) RFmodel = RFlearner(self.irisData) # Calculate classification accuracy Acc = evalUtilities.getClassificationAccuracy(self.irisData, RFmodel) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "modelPriors.RF") RFmodel.write(modelPath) # Read in the model newRFmodel = AZorngRF.RFread(modelPath) # Calculate classification accuracy savedAcc = evalUtilities.getClassificationAccuracy( self.irisData, newRFmodel) # Test that the accuracy of the two classifiers is the exact same self.assertEqual(Acc, savedAcc) #Check the priors saved in the model file = open(os.path.join(modelPath, "model.rf"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[22].strip() + lines[23].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 3) self.assertEqual( priors[self.irisData.domain.classVar.values.index("Iris-setosa")], 0.52) self.assertEqual( priors[self.irisData.domain.classVar.values.index( "Iris-versicolor")], 0.35) self.assertEqual( priors[self.irisData.domain.classVar.values.index( "Iris-virginica")], 0.13) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def test_SavedModel(self): """Test to assure that a saved RF model gives the same predictions as before saving.""" # Create a RF model RFlearner = AZorngRF.RFLearner(maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0") RFmodel = RFlearner(self.trainData) # Calculate classification accuracy Acc = evalUtilities.getClassificationAccuracy(self.testData, RFmodel) # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "model.RF") RFmodel.write(modelPath) # Read in the model newRFmodel = AZorngRF.RFread(modelPath) # Calculate classification accuracy savedAcc = evalUtilities.getClassificationAccuracy( self.testData, newRFmodel) # Test that the accuracy of the two classifiers is the exact same self.assertEqual(Acc, savedAcc) #Check the priors saved in the model file = open(os.path.join(modelPath, "model.rf"), "r") lines = file.readlines() file.close() priors = [ round(x, 2) for x in eval((lines[22].strip()).replace("data:", "")) ] self.assertEqual(len(priors), 2) self.assertEqual( priors[self.testData.domain.classVar.values.index("POS")], 0.50) self.assertEqual( priors[self.testData.domain.classVar.values.index("NEG")], 0.50) # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
def TopVarImportanceTest(data, expectNone=False): resA = [] resB = [] RF = AZorngRF.RFLearner(data) for ex in data: resA.append(RF.getTopImportantVars(ex, 1)) scratchdir = miscUtilities.createScratchDir( desc="TopVarImportanceTest") modelPath = os.path.join(scratchdir, "CvRFModel") RF.write(modelPath) LoadedRF = AZorngRF.RFread(modelPath) miscUtilities.removeDir(scratchdir) for ex in data: resB.append(LoadedRF.getTopImportantVars(ex, 1)) if expectNone: return resA == resB == [None] * len(data) else: return resA == resB and None not in resA and resA.count( resA[0]) != len(resA)
def test_BuiltIn_Impute(self): """Test RF BuiltIn missing values imputation Assure that imputation works for the rf models. Test on data with missing values """ #This data is loaded here to speed up the test suite since it is too big contTestDataPath = os.path.join(AZOC.AZORANGEHOME, "tests/source/data/linearTest.tab") contTrainDataPath = os.path.join(AZOC.AZORANGEHOME, "tests/source/data/linearTrain.tab") contTrain = dataUtilities.DataTable(contTrainDataPath) contTest = dataUtilities.DataTable(contTestDataPath) ex1 = contTest[5] ex2 = contTest[2] AttrEx1 = "Desc 71" AttrEx2 = "Desc 72" self.assert_(ex1[AttrEx1] != "?", "The var Desc 671 shouldn't be missing!") self.assert_(ex2[AttrEx2] != "?", "The var Desc 138 shouldn't be missing!") imputer = orange.ImputerConstructor_average(contTrain) RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \ nActVars = "0", nTrees = "100", forestAcc = "0.001", termCrit = "0",useBuiltInMissValHandling = True ) rf = RFlearner(contTrain) # Prediction for data as it is P1 = rf(ex1) P2 = rf(ex2) # Predictions changing one continuous and one discrete variable to 0 ex1[AttrEx1] = 0 ex2[AttrEx2] = 0 P1_0 = rf(ex1) P2_0 = rf(ex2) # Predictions changing the same continuous and discrete variable to it's correspondent imputation value #ex1["Desc 71"]=imputer.defaults["Desc 71"] #ex2["Desc 138"]=imputer.defaults["Desc 138"] #P1_imp=rf(ex1) #P2_imp=rf(ex2) # Predictions changing the same continuous and discrete variable to '?' wich means that the same imputation # as in the last case will have to be made inside the classifier. So, the predicted value must be the same ex1[AttrEx1] = "?" ex2[AttrEx2] = "?" self.assert_(ex1[AttrEx1] == "?", "The var Desc 71 should be missing now!") self.assert_(ex2[AttrEx2] == "?", "The var Desc 138 should be missing now!") P1Miss = rf(ex1) P2Miss = rf(ex2) # Test if the prediction made for the example with mising value is the same as the one # for the example which missing values were substituted using the same method as the classifier does. #self.assert_(P1_imp==P1Miss,"Imputation was not made correctly inside the classifier") #self.assert_(P2_imp==P2Miss,"Imputation was not made correctly inside the classifier") # Assure that if other substitutions on those variables were made, the predicted value would be different, # and so, this is a valid method for testing the imputation self.assert_( P1.value != P2.value) # Just to assure that we are not comaring equal examples self.assert_( P1.value != P1Miss.value, "The imputed 1 was the same as the original ... try other example") self.assert_( P1_0.value != P1Miss.value, "The imputed 1 was the same as the replaced by 0. The classifier may be replacing missing values by 0" ) self.assert_( P2.value != P2Miss.value, "The missing imputed 2 was the same as the original ... try other example" ) #self.assert_(P2_0.value!=P2Miss.value,"The missing imputed 2 was the same as the replaced by 0. The classifier may be replacing missing values by 0") self.assert_(rf.useBuiltInMissValHandling == True) #Test the imputer for saved models # Save the model scratchdir = os.path.join(AZOC.SCRATCHDIR, "scratchdirTest" + str(time.time())) os.mkdir(scratchdir) modelPath = os.path.join(scratchdir, "RFModel") rf.write(modelPath) # Read in the model rfM = AZorngRF.RFread(modelPath) self.assert_(rfM.useBuiltInMissValHandling == True) # Predict the ex1 and ex2 which are still the examples with missing values '?' self.assert_(ex1[AttrEx1] == "?", "Value of Var Desc 6 should be missing!") self.assert_(ex2[AttrEx2] == "?", "Value of Var Desc 71 should be missing!") self.assert_( rfM(ex1) == P1Miss, "Imputation on loaded model is not correct") self.assert_( rfM(ex2) == P2Miss, "Imputation on loaded model is not correct") # Remove the scratch directory os.system("/bin/rm -rf " + scratchdir)
for ex in data: mol = Chem.MolFromSmiles(ex["Smiles"].value) if mol: molList.append(mol) else: print ex["Smiles"].value print ex["Leonumber"].value fps = [FingerprintMols.FingerprintMol(x) for x in molList] # Topological #fps = [AllChem.GetMorganFingerprint(x, 2) for x in molList] #print "Length of data and fp ", len(data), len(fps) return fps THRS = 0.75 model = AZorngRF.RFread("OI_RFmodel") predictor = AZOrangePredictor.AZOrangePredictor("OI_RFmodel") train = dataUtilities.DataTable("BioActivityAZOdesc.txt") # Calculate fingerprints for train and test sets fps = getFps(train) #smiles = test[idx]["Smiles"].value smiles = "CC(C)n1c(/C=C/[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21" smiles = "Cc1cc(=Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH][nH]1" # Train set #smiles = "Cc1nc2c(CN3CCOCC3)cc(NC3=CC(C)NN3)nn2c1Cc1ccc(Cl)cc1F" # From Drawing - Wrong no tautomer smiles = "Cc1cc(Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH]n1" #From drawing of Galilei structure #smiles = "Cc1cc(=Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH][nH]1" # Canonicalized from drawing in Galilei cmd = "env -i HOME='$HOME' bash -l -c './cleanSmiles.sh " + '"' + smiles + '"' + "'" print cmd
def modelRead(modelFile=None, verbose=0, retrunClassifier=True): """Get the type of model saved in 'modelPath' and loads the respective model Returns the Classifier saved in the respective model path If called without parameters, it returns a list of known classifier types It can returns the classifier, or just a string with the Type modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )""" if not modelFile: return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost", "CvBayes", "Consensus") modelType = None loadedModel = None if os.path.isfile(os.path.join(modelFile, "model.svm")): modelType = "CvSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngCvSVM loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")): modelType = "SignSVM" if not retrunClassifier: return modelType from trainingMethods import AZorngSignSVM loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.ann")): modelType = "CvANN" if not retrunClassifier: return modelType from trainingMethods import AZorngCvANN loadedModel = AZorngCvANN.CvANNread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "Model.pls")): modelType = "PLS" if not retrunClassifier: return modelType from trainingMethods import AZorngPLS loadedModel = AZorngPLS.PLSread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.rf")): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) elif os.path.isdir(os.path.join(modelFile, "C0.model")): modelType = "Consensus" if not retrunClassifier: return modelType from trainingMethods import AZorngConsensus loadedModel = AZorngConsensus.Consensusread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.boost")): modelType = "CvBoost" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBoost loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose) elif os.path.isfile(os.path.join(modelFile, "model.bayes")): modelType = "CvBayes" if not retrunClassifier: return modelType from trainingMethods import AZorngCvBayes loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose) else: # Assuming an RF old format for backcompatibility try: if os.path.isdir(modelFile): modelType = "RF" if not retrunClassifier: return modelType from trainingMethods import AZorngRF loadedModel = AZorngRF.RFread(modelFile, verbose) else: modelType = None loadedModel = None except: modelType = None loadedModel = None return loadedModel