def testModelSaveAndLoad(self):
        # Keywords model uses the base class implementations of save/load methods.
        self.modelDir = "poke_model"
        model = ClassificationModelKeywords(modelDir=self.modelDir, verbosity=0)

        samples = {
            0: (["Pickachu"], numpy.array([0, 2, 2])),
            1: (["Eevee"], numpy.array([2])),
            2: (["Charmander"], numpy.array([0, 1, 1])),
            3: (["Abra"], numpy.array([1])),
            4: (["Squirtle"], numpy.array([1, 0, 1])),
        }

        patterns = model.encodeSamples(samples)
        for i in xrange(len(samples)):
            model.trainModel(i)

        output = [model.testModel(i) for i in xrange(len(patterns))]

        model.saveModel()

        loadedModel = ClassificationModel(verbosity=0).loadModel(self.modelDir)
        loadedModelOutput = [loadedModel.testModel(i) for i in xrange(len(patterns))]

        for mClasses, lClasses in zip(output, loadedModelOutput):
            self.assertSequenceEqual(
                mClasses.tolist(),
                lClasses.tolist(),
                "Output " "classifcations from loaded model don't match original model's.",
            )
示例#2
0
  def createModel(self, modelName, loadPath, savePath, *modelFactoryArgs,
      **modelFactoryKwargs):
    """ Creates a new model and trains it, or loads a previously trained model
    from specified loadPath.
    """
    # The model name must be an identifier defined in the model factory mapping.
    modelType = getattr(ClassificationModelTypes, self._mapModelName(modelName))

    if loadPath:
      # User has explicitly specified a load path and expects a model to exist
      try:
        model = ClassificationModel.load(loadPath)

      except IOError as exc:
        # Model was not found, user may have specified incorrect path, DO NOT
        # attempt to create a new model and raise an exception
        raise ImbuUnableToLoadModelError(exc)
    else:
      # User has not specified a load path, defer to default case and
      # gracefully create a new model
      try:
        model = ClassificationModel.load(loadPath)
      except IOError as exc:
        model = self._modelFactory(modelName,
                                   savePath,
                                   *modelFactoryArgs,
                                   **modelFactoryKwargs)
        self.train(model, savePath)

    return model
    def testNoWinningLabels(self):
        """Inferring 0/4 classes should return 0 winning labels."""
        model = ClassificationModel()

        inferenceResult = numpy.array([0, 0, 0, 0])
        topLabels = model.getWinningLabels(inferenceResult)

        self.assertFalse(topLabels)
    def testCalculateAccuracyMultipleSamples(self):
        """
    Tests testCalculateAccuracy() method of classification model base class for
    three test samples.
    """
        model = ClassificationModel()

        actualLabels = [numpy.array([0]), numpy.array([0, 2]), numpy.array([0, 1, 2])]
        predictedLabels = [numpy.array([0]), [None], numpy.array([1, 2, 0])]
        classifications = [predictedLabels, actualLabels]

        self.assertAlmostEqual(model.calculateAccuracy(classifications), float(2) / 3)
示例#5
0
def createModel(modelName, modelFactory):
  """Return an instantiated model."""

  global g_models

  modelDir = os.path.join(_MODEL_CACHE_DIR_PREFIX, modelName)

  try:
    print "Attempting to load from", modelDir
    model = ClassificationModel.loadModel(modelDir)
    modelProxy = SynchronousBackgroundModelProxy(model)
    print "Model loaded from", modelDir

  except IOError:
    print "Model failed to load from", modelDir, "Let's train it from scratch."


    if modelFactory is None:
      raise ValueError("Could not instantiate model '{}'.".format(modelName))

    if modelName == "HTMNetwork":

      raise NotImplementedError()

    elif modelName == "CioWordFingerprint":
      model = modelFactory(retina=os.environ["IMBU_RETINA_ID"],
                           apiKey=os.environ["CORTICAL_API_KEY"],
                           fingerprintType=EncoderTypes.word,
                           modelDir=modelDir,
                           cacheRoot=_MODEL_CACHE_DIR_PREFIX)

    elif modelName == "CioDocumentFingerprint":
      model = modelFactory(retina=os.environ["IMBU_RETINA_ID"],
                           apiKey=os.environ["CORTICAL_API_KEY"],
                           fingerprintType=EncoderTypes.document,
                           modelDir=modelDir,
                           cacheRoot=_MODEL_CACHE_DIR_PREFIX)

    else:
      model = modelFactory(modelDir=modelDir)

    model.verbosity = 0
    model.numLabels = 0

    modelProxy = SynchronousBackgroundModelProxy(model)

    samples = modelProxy.prepData(g_csvdata, False)

    modelProxy.encodeSamples(samples)

    for i in xrange(len(samples)):
      modelProxy.trainModel(i)

    print "Model trained, save it."

    modelProxy.saveModel()

    print "Model saved"

  g_models[modelName] = modelProxy
示例#6
0
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (dataSet, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args)

  # Train only with documents whose id's are divisible by 100
  trainingData = [x for i,x in enumerate(dataSet) if x[2]%100==0]
  testData = [x for i,x in enumerate(dataSet) if x[2]%100!=0]

  print "Num training",len(trainingData),"num testing",len(testData)

  # Create model
  model = instantiateModel(args)

  model = trainModel(args, model, trainingData, labelRefs)
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)
  testModel(args, newmodel, testData, labelRefs, documentCategoryMap)

  return model
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """
    # Create model
    model = instantiateModel(args)

    # Train model on the first 80% of the dataset
    trainingSplit = int(len(_DATASET) * 0.80)
    model = trainModel(model, _DATASET[:trainingSplit])

    # Test model on the full dataset
    accuracyPct = testModel(model, _DATASET)

    # Validate serialization - testing after reloading should give same result
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    print
    print "Testing serialization..."
    newAccuracyPct = testModel(newModel, _DATASET)
    if accuracyPct == newAccuracyPct:
        print "Serialization validated."
    else:
        print (
            "Inconsistent results before ({}) and after ({}) saving/loading "
            "the model!".format(accuracyPct, newAccuracyPct)
        )
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """
    # Create model
    model = instantiateModel(args)

    # Train model on the first 80% of the dataset
    trainingSplit = int(len(_DATASET) * 0.80)
    model = trainModel(model, _DATASET[:trainingSplit])

    # Test model on the full dataset
    accuracyPct = testModel(model, _DATASET)

    # Validate serialization - testing after reloading should give same result
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    print
    print "Testing serialization..."
    newAccuracyPct = testModel(newModel, _DATASET)
    if accuracyPct == newAccuracyPct:
        print "Serialization validated."
    else:
        print(
            "Inconsistent results before ({}) and after ({}) saving/loading "
            "the model!".format(accuracyPct, newAccuracyPct))
 def _executeModelLifecycle(self, modelName, modelDir):
   """ Create a model, train it, save it, reload it, return it."""
   model = createModel(modelName, **self.modelParams)
   model = trainModel(model, self.dataSet)
   model.save(modelDir)
   del model
   return ClassificationModel.load(modelDir)
    def testWinningLabels(self):
        """
    Tests whether classification base class returns multiple labels correctly.
    """
        model = ClassificationModel()
        inferenceResult = numpy.array([3, 1, 4, 0, 1, 0])

        topLabels = model.getWinningLabels(inferenceResult, numLabels=1)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2])), "Output should be label 2.")

        topLabels = model.getWinningLabels(inferenceResult, numLabels=2)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.")

        # Test only nonzero labels are returned.
        inferenceResult = numpy.array([3, 0, 4, 0, 0, 0])
        topLabels = model.getWinningLabels(inferenceResult, numLabels=5)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])), "Output should be labels 2 and 0.")
    def testCalculateAccuracyMultipleSamples(self):
        """
    Tests testCalculateAccuracy() method of classification model base class for
    three test samples.
    """
        model = ClassificationModel()

        actualLabels = [
            numpy.array([0]),
            numpy.array([0, 2]),
            numpy.array([0, 1, 2])
        ]
        predictedLabels = [numpy.array([0]), [None], numpy.array([1, 2, 0])]
        classifications = [predictedLabels, actualLabels]

        self.assertAlmostEqual(model.calculateAccuracy(classifications),
                               float(2) / 3)
    def testCalculateAccuracyMixedSamples(self):
        """
    Tests testCalculateAccuracy() method of classification model base class for
    test samples with mixed classifications.
    """
        model = ClassificationModel()

        actualLabels = [numpy.array([0, 1, 2])]
        predictedLabels1 = [numpy.array([1, 2, 0])]
        predictedLabels2 = [numpy.array([1])]
        predictedLabels3 = [None]
        classifications1 = [predictedLabels1, actualLabels]
        classifications2 = [predictedLabels2, actualLabels]
        classifications3 = [predictedLabels3, actualLabels]

        self.assertAlmostEqual(model.calculateAccuracy(classifications1), 1.0)
        self.assertAlmostEqual(model.calculateAccuracy(classifications2), float(1) / 3)
        self.assertAlmostEqual(model.calculateAccuracy(classifications3), 0.0)
    def testCalculateAccuracyMixedSamples(self):
        """
    Tests testCalculateAccuracy() method of classification model base class for
    test samples with mixed classifications.
    """
        model = ClassificationModel()

        actualLabels = [numpy.array([0, 1, 2])]
        predictedLabels1 = [numpy.array([1, 2, 0])]
        predictedLabels2 = [numpy.array([1])]
        predictedLabels3 = [None]
        classifications1 = [predictedLabels1, actualLabels]
        classifications2 = [predictedLabels2, actualLabels]
        classifications3 = [predictedLabels3, actualLabels]

        self.assertAlmostEqual(model.calculateAccuracy(classifications1), 1.0)
        self.assertAlmostEqual(model.calculateAccuracy(classifications2),
                               float(1) / 3)
        self.assertAlmostEqual(model.calculateAccuracy(classifications3), 0.0)
    def testWinningLabels(self):
        """
    Tests whether classification base class returns multiple labels correctly.
    """
        model = ClassificationModel()
        inferenceResult = numpy.array([3, 1, 4, 0, 1, 0])

        topLabels = model.getWinningLabels(inferenceResult, numLabels=1)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2])),
                        "Output should be label 2.")

        topLabels = model.getWinningLabels(inferenceResult, numLabels=2)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])),
                        "Output should be labels 2 and 0.")

        # Test only nonzero labels are returned.
        inferenceResult = numpy.array([3, 0, 4, 0, 0, 0])
        topLabels = model.getWinningLabels(inferenceResult, numLabels=5)
        self.assertTrue(numpy.allclose(topLabels, numpy.array([2, 0])),
                        "Output should be labels 2 and 0.")
示例#15
0
def executeModelLifecycle(args, trainingData, labelRefs):
    """ Execute model lifecycle: create a model, train it, save it, reload it.

  @param args (argparse) Arguments used in classification model API experiments.
  @param trainingData (dict) Keys are document numbers, values are three-tuples
      of the document (str), labels (list), and document ID (int).
  @param labelRefs (list) Label names (str) corresponding to label indices.

  @return (two-tuple) Original and new models.
  """
    model = instantiateModel(args)
    model = trainModel(model, trainingData, labelRefs, args.verbosity)
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)
    return model, newModel
def executeModelLifecycle(args, trainingData, labelRefs):
  """ Execute model lifecycle: create a model, train it, save it, reload it.

  @param args (argparse) Arguments used in classification model API experiments.
  @param trainingData (dict) Keys are document numbers, values are three-tuples
      of the document (str), labels (list), and document ID (int).
  @param labelRefs (list) Label names (str) corresponding to label indices.

  @return (two-tuple) Original and new models.
  """
  model = instantiateModel(args)
  model = trainModel(model, trainingData, labelRefs, args.verbosity)
  model.save(args.modelDir)
  newModel = ClassificationModel.load(args.modelDir)
  return model, newModel
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (trainingData, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args,
                         [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14])

  model = ClassificationModel.load(args.modelDir)

  analyzeModel(args, model, documentTextMap)

  return model
def runExperiment(args, trainingData, testData):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  model = createModel(args)
  model = trainModel(args, model, trainingData)
  testModel(args, model, testData)

  # Test serialization - should give same result as above
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  print
  print "==========================Testing after de-serialization========"
  testModel(args, newmodel, testData)
def runExperiment(args, trainingData, testData):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

    model = createModel(args)
    model = trainModel(args, model, trainingData)
    testModel(args, model, testData)

    # Test serialization - should give same result as above
    model.save(args.modelDir)
    newmodel = ClassificationModel.load(args.modelDir)
    print
    print "==========================Testing after de-serialization========"
    testModel(args, newmodel, testData)
def setupExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model.

  @return newModel (ClassificationModel) The restored NLP model.
  @return dataSet (list) Each item is a list representing a data sample, with
      the text string, list of label indices, and the sample ID.
  """
  dataSet, labelRefs, _, _ = readDataAndReshuffle(args)
  args.numLabels = len(labelRefs)

  # Create a model, train it, save it, reload it
  model = instantiateModel(args)
  model = trainModel(model, dataSet, labelRefs, args.verbosity)
  model.save(args.modelDir)
  newModel = ClassificationModel.load(args.modelDir)

  return newModel, dataSet
示例#21
0
def setupExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model.

  @return newModel (ClassificationModel) The restored NLP model.
  @return dataSet (list) Each item is a list representing a data sample, with
      the text string, list of label indices, and the sample ID.
  """
    dataSet, labelRefs, _, _ = readDataAndReshuffle(args)
    args.numLabels = len(labelRefs)

    # Create a model, train it, save it, reload it
    model = instantiateModel(args)
    model = trainModel(model, dataSet, labelRefs, args.verbosity)
    model.save(args.modelDir)
    newModel = ClassificationModel.load(args.modelDir)

    return newModel, dataSet
示例#22
0
def runExperiment(args):
    """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

    (trainingData, labelRefs,
     documentCategoryMap, documentTextMap) = readDataAndReshuffle(
         args, [8, 9, 10, 5, 6, 11, 13, 0, 1, 2, 3, 4, 7, 12, 14])

    # Create model
    model = instantiateModel(args)

    model = trainModel(args, model, trainingData, labelRefs)
    model.save(args.modelDir)
    newmodel = ClassificationModel.load(args.modelDir)
    testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)

    # Print profile information
    print
    model.dumpProfile()

    return model
def runExperiment(args):
  """
  Create model according to args, train on training data, save model,
  restore model, test on test data.
  """

  (trainingData, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args,
                         [8,9,10,5,6,11,13,0,1,2,3,4,7,12,14])

  # Create model
  model = instantiateModel(args)

  model = trainModel(args, model, trainingData, labelRefs)
  model.save(args.modelDir)
  newmodel = ClassificationModel.load(args.modelDir)
  testModel(args, newmodel, trainingData, labelRefs, documentCategoryMap)

  # Print profile information
  print
  model.dumpProfile()

  return model