def testModelSaveAndLoad(self): # Keywords model uses the base class implementations of save/load methods. self.modelDir = "poke_model" model = ClassificationModelKeywords(modelDir=self.modelDir, verbosity=0) samples = { 0: (["Pickachu"], numpy.array([0, 2, 2])), 1: (["Eevee"], numpy.array([2])), 2: (["Charmander"], numpy.array([0, 1, 1])), 3: (["Abra"], numpy.array([1])), 4: (["Squirtle"], numpy.array([1, 0, 1])), } patterns = model.encodeSamples(samples) for i in xrange(len(samples)): model.trainModel(i) output = [model.testModel(i) for i in xrange(len(patterns))] model.saveModel() loadedModel = ClassificationModel(verbosity=0).loadModel(self.modelDir) loadedModelOutput = [loadedModel.testModel(i) for i in xrange(len(patterns))] for mClasses, lClasses in zip(output, loadedModelOutput): self.assertSequenceEqual( mClasses.tolist(), lClasses.tolist(), "Output " "classifcations from loaded model don't match original model's.", )
def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs, **modelFactoryKwargs): """ Creates a new model and trains it, or loads a previously trained model from specified loadPath. """ # The model name must be an identifier defined in the model factory mapping. modelType = getattr(ClassificationModelTypes, self._mapModelName(modelName)) if loadPath: # User has explicitly specified a load path and expects a model to exist try: model = ClassificationModel.load(loadPath) except IOError as exc: # Model was not found, user may have specified incorrect path, DO NOT # attempt to create a new model and raise an exception raise ImbuUnableToLoadModelError(exc) else: # User has not specified a load path, defer to default case and # gracefully create a new model try: model = ClassificationModel.load(loadPath) except IOError as exc: model = self._modelFactory(modelName, savePath, *modelFactoryArgs, **modelFactoryKwargs) self.train(model, savePath) return model
def testNoWinningLabels(self): """Inferring 0/4 classes should return 0 winning labels.""" model = ClassificationModel() inferenceResult = numpy.array([0, 0, 0, 0]) topLabels = model.getWinningLabels(inferenceResult) self.assertFalse(topLabels)
def testCalculateAccuracyMultipleSamples(self): """ Tests testCalculateAccuracy() method of classification model base class for three test samples. """ model = ClassificationModel() actualLabels = [numpy.array([0]), numpy.array([0, 2]), numpy.array([0, 1, 2])] predictedLabels = [numpy.array([0]), [None], numpy.array([1, 2, 0])] classifications = [predictedLabels, actualLabels] self.assertAlmostEqual(model.calculateAccuracy(classifications), float(2) / 3)
def createModel(modelName, modelFactory): """Return an instantiated model.""" global g_models modelDir = os.path.join(_MODEL_CACHE_DIR_PREFIX, modelName) try: print "Attempting to load from", modelDir model = ClassificationModel.loadModel(modelDir) modelProxy = SynchronousBackgroundModelProxy(model) print "Model loaded from", modelDir except IOError: print "Model failed to load from", modelDir, "Let's train it from scratch." if modelFactory is None: raise ValueError("Could not instantiate model '{}'.".format(modelName)) if modelName == "HTMNetwork": raise NotImplementedError() elif modelName == "CioWordFingerprint": model = modelFactory(retina=os.environ["IMBU_RETINA_ID"], apiKey=os.environ["CORTICAL_API_KEY"], fingerprintType=EncoderTypes.word, modelDir=modelDir, cacheRoot=_MODEL_CACHE_DIR_PREFIX) elif modelName == "CioDocumentFingerprint": model = modelFactory(retina=os.environ["IMBU_RETINA_ID"], apiKey=os.environ["CORTICAL_API_KEY"], fingerprintType=EncoderTypes.document, modelDir=modelDir, cacheRoot=_MODEL_CACHE_DIR_PREFIX) else: model = modelFactory(modelDir=modelDir) model.verbosity = 0 model.numLabels = 0 modelProxy = SynchronousBackgroundModelProxy(model) samples = modelProxy.prepData(g_csvdata, False) modelProxy.encodeSamples(samples) for i in xrange(len(samples)): modelProxy.trainModel(i) print "Model trained, save it." modelProxy.saveModel() print "Model saved" g_models[modelName] = modelProxy
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (dataSet, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args) # Train only with documents whose id's are divisible by 100 trainingData = [x for i,x in enumerate(dataSet) if x[2]%100==0] testData = [x for i,x in enumerate(dataSet) if x[2]%100!=0] print "Num training",len(trainingData),"num testing",len(testData) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) testModel(args, newmodel, testData, labelRefs, documentCategoryMap) return model
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ # Create model model = instantiateModel(args) # Train model on the first 80% of the dataset trainingSplit = int(len(_DATASET) * 0.80) model = trainModel(model, _DATASET[:trainingSplit]) # Test model on the full dataset accuracyPct = testModel(model, _DATASET) # Validate serialization - testing after reloading should give same result model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) print print "Testing serialization..." newAccuracyPct = testModel(newModel, _DATASET) if accuracyPct == newAccuracyPct: print "Serialization validated." else: print ( "Inconsistent results before ({}) and after ({}) saving/loading " "the model!".format(accuracyPct, newAccuracyPct) )
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ # Create model model = instantiateModel(args) # Train model on the first 80% of the dataset trainingSplit = int(len(_DATASET) * 0.80) model = trainModel(model, _DATASET[:trainingSplit]) # Test model on the full dataset accuracyPct = testModel(model, _DATASET) # Validate serialization - testing after reloading should give same result model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) print print "Testing serialization..." newAccuracyPct = testModel(newModel, _DATASET) if accuracyPct == newAccuracyPct: print "Serialization validated." else: print( "Inconsistent results before ({}) and after ({}) saving/loading " "the model!".format(accuracyPct, newAccuracyPct))
def _executeModelLifecycle(self, modelName, modelDir): """ Create a model, train it, save it, reload it, return it.""" model = createModel(modelName, **self.modelParams) model = trainModel(model, self.dataSet) model.save(modelDir) del model return ClassificationModel.load(modelDir)
def testWinningLabels(self): """ Tests whether classification base class returns multiple labels correctly. """ model = ClassificationModel() inferenceResult = numpy.array([3, 1, 4, 0, 1, 0]) topLabels = model.getWinningLabels(inferenceResult, numLabels=1) self.assertTrue(numpy.allclose(topLabels, numpy.array([2])), "Output should be label 2.") topLabels = model.getWinningLabels(inferenceResult, numLabels=2) self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.") # Test only nonzero labels are returned. inferenceResult = numpy.array([3, 0, 4, 0, 0, 0]) topLabels = model.getWinningLabels(inferenceResult, numLabels=5) self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.")
def testCalculateAccuracyMultipleSamples(self): """ Tests testCalculateAccuracy() method of classification model base class for three test samples. """ model = ClassificationModel() actualLabels = [ numpy.array([0]), numpy.array([0, 2]), numpy.array([0, 1, 2]) ] predictedLabels = [numpy.array([0]), [None], numpy.array([1, 2, 0])] classifications = [predictedLabels, actualLabels] self.assertAlmostEqual(model.calculateAccuracy(classifications), float(2) / 3)
def testCalculateAccuracyMixedSamples(self): """ Tests testCalculateAccuracy() method of classification model base class for test samples with mixed classifications. """ model = ClassificationModel() actualLabels = [numpy.array([0, 1, 2])] predictedLabels1 = [numpy.array([1, 2, 0])] predictedLabels2 = [numpy.array([1])] predictedLabels3 = [None] classifications1 = [predictedLabels1, actualLabels] classifications2 = [predictedLabels2, actualLabels] classifications3 = [predictedLabels3, actualLabels] self.assertAlmostEqual(model.calculateAccuracy(classifications1), 1.0) self.assertAlmostEqual(model.calculateAccuracy(classifications2), float(1) / 3) self.assertAlmostEqual(model.calculateAccuracy(classifications3), 0.0)
def testCalculateAccuracyMixedSamples(self): """ Tests testCalculateAccuracy() method of classification model base class for test samples with mixed classifications. """ model = ClassificationModel() actualLabels = [numpy.array([0, 1, 2])] predictedLabels1 = [numpy.array([1, 2, 0])] predictedLabels2 = [numpy.array([1])] predictedLabels3 = [None] classifications1 = [predictedLabels1, actualLabels] classifications2 = [predictedLabels2, actualLabels] classifications3 = [predictedLabels3, actualLabels] self.assertAlmostEqual(model.calculateAccuracy(classifications1), 1.0) self.assertAlmostEqual(model.calculateAccuracy(classifications2), float(1) / 3) self.assertAlmostEqual(model.calculateAccuracy(classifications3), 0.0)
def testWinningLabels(self): """ Tests whether classification base class returns multiple labels correctly. """ model = ClassificationModel() inferenceResult = numpy.array([3, 1, 4, 0, 1, 0]) topLabels = model.getWinningLabels(inferenceResult, numLabels=1) self.assertTrue(numpy.allclose(topLabels, numpy.array([2])), "Output should be label 2.") topLabels = model.getWinningLabels(inferenceResult, numLabels=2) self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.") # Test only nonzero labels are returned. inferenceResult = numpy.array([3, 0, 4, 0, 0, 0]) topLabels = model.getWinningLabels(inferenceResult, numLabels=5) self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.")
def executeModelLifecycle(args, trainingData, labelRefs): """ Execute model lifecycle: create a model, train it, save it, reload it. @param args (argparse) Arguments used in classification model API experiments. @param trainingData (dict) Keys are document numbers, values are three-tuples of the document (str), labels (list), and document ID (int). @param labelRefs (list) Label names (str) corresponding to label indices. @return (two-tuple) Original and new models. """ model = instantiateModel(args) model = trainModel(model, trainingData, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return model, newModel
def executeModelLifecycle(args, trainingData, labelRefs): """ Execute model lifecycle: create a model, train it, save it, reload it. @param args (argparse) Arguments used in classification model API experiments. @param trainingData (dict) Keys are document numbers, values are three-tuples of the document (str), labels (list), and document ID (int). @param labelRefs (list) Label names (str) corresponding to label indices. @return (two-tuple) Original and new models. """ model = instantiateModel(args) model = trainModel(model, trainingData, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return model, newModel
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args, [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14]) model = ClassificationModel.load(args.modelDir) analyzeModel(args, model, documentTextMap) return model
def runExperiment(args, trainingData, testData): """ Create model according to args, train on training data, save model, restore model, test on test data. """ model = createModel(args) model = trainModel(args, model, trainingData) testModel(args, model, testData) # Test serialization - should give same result as above model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) print print "==========================Testing after de-serialization========" testModel(args, newmodel, testData)
def runExperiment(args, trainingData, testData): """ Create model according to args, train on training data, save model, restore model, test on test data. """ model = createModel(args) model = trainModel(args, model, trainingData) testModel(args, model, testData) # Test serialization - should give same result as above model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) print print "==========================Testing after de-serialization========" testModel(args, newmodel, testData)
def setupExperiment(args): """ Create model according to args, train on training data, save model, restore model. @return newModel (ClassificationModel) The restored NLP model. @return dataSet (list) Each item is a list representing a data sample, with the text string, list of label indices, and the sample ID. """ dataSet, labelRefs, _, _ = readDataAndReshuffle(args) args.numLabels = len(labelRefs) # Create a model, train it, save it, reload it model = instantiateModel(args) model = trainModel(model, dataSet, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return newModel, dataSet
def setupExperiment(args): """ Create model according to args, train on training data, save model, restore model. @return newModel (ClassificationModel) The restored NLP model. @return dataSet (list) Each item is a list representing a data sample, with the text string, list of label indices, and the sample ID. """ dataSet, labelRefs, _, _ = readDataAndReshuffle(args) args.numLabels = len(labelRefs) # Create a model, train it, save it, reload it model = instantiateModel(args) model = trainModel(model, dataSet, labelRefs, args.verbosity) model.save(args.modelDir) newModel = ClassificationModel.load(args.modelDir) return newModel, dataSet
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle( args, [8, 9, 10, 5, 6, 11, 13, 0, 1, 2, 3, 4, 7, 12, 14]) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) # Print profile information print model.dumpProfile() return model
def runExperiment(args): """ Create model according to args, train on training data, save model, restore model, test on test data. """ (trainingData, labelRefs, documentCategoryMap, documentTextMap) = readDataAndReshuffle(args, [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14]) # Create model model = instantiateModel(args) model = trainModel(args, model, trainingData, labelRefs) model.save(args.modelDir) newmodel = ClassificationModel.load(args.modelDir) testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap) # Print profile information print model.dumpProfile() return model