def instantiateModel(args):
  """
  Set some specific arguments and return an instance of the model we will use.
  """
  args.networkConfig = getNetworkConfig(args.networkConfigPath)
  args.k = kValues.get(args.modelName, 1)
  return createModel(**vars(args))
Пример #2
0
def instantiateModel(args):
    """
  Set some specific arguments and return an instance of the model we will use.
  """
    args.networkConfig = getNetworkConfig(args.networkConfigPath)
    args.k = kValues.get(args.modelName, 1)
    return createModel(**vars(args))
Пример #3
0
def _loadNetworkConfig(jsonName=None):
    """ Load network config by calculating path relative to this file, and load
  with htmresearch.frameworks.nlp.model_factory.getNetworkConfig()
  """
    if not jsonName:
        raise RuntimeError("Need a config file to build the network model.")

    root = (os.path.dirname(
        os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))

    return getNetworkConfig(
        os.path.join(root, "projects/nlp/data/network_configs", jsonName))
def instantiateModel(args):
    """
  Return an instance of the model we will use.
  """
    # Some values of K we know work well for this problem for specific model types
    kValues = {"keywords": 21, "docfp": 3}

    # Create model after setting specific arguments required for this experiment
    args.networkConfig = getNetworkConfig(args.networkConfigPath)
    args.numLabels = 2
    args.k = kValues.get(args.modelName, 1)

    return createModel(**vars(args))
def instantiateModel(args):
    """
  Return an instance of the model we will use.
  """
    # Some values of K we know work well for this problem for specific model types
    kValues = {"keywords": 21, "docfp": 3}

    # Create model after setting specific arguments required for this experiment
    args.networkConfig = getNetworkConfig(args.networkConfigPath)
    args.numLabels = 2
    args.k = kValues.get(args.modelName, 1)

    return createModel(**vars(args))
Пример #6
0
  def testSensorSimpleUPKNN(self):
    # Build model
    modelName = "htm"
    modelDir = os.path.join(self.modelDir, "htm.checkpoint")

    networkConfigPath = os.path.join(
      _ROOT, "projects/nlp/data/network_configs/sensor_simple_TP_knn.json")

    self.modelParams.update(
      networkConfig=getNetworkConfig(networkConfigPath),
      numLabels=2,
      modelDir=modelDir,
    )

    model = self._executeModelLifecycle(modelName, modelDir)

    # Test model inference
    self._validateInference(model, modelName)
    self._inferWithFirstDocument(model, modelName)
Пример #7
0
def _loadNetworkConfig(jsonName=None):
  """ Load network config by calculating path relative to this file, and load
  with htmresearch.frameworks.nlp.model_factory.getNetworkConfig()
  """
  if not jsonName:
    raise RuntimeError("Need a config file to build the network model.")

  root = (
    os.path.dirname(
      os.path.dirname(
        os.path.dirname(
          os.path.dirname(
            os.path.realpath(__file__)
          )
        )
      )
    )
  )

  return getNetworkConfig(
    os.path.join(root, "projects/nlp/data/network_configs", jsonName))
def runExperiment(args):
  if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)
  
  (trainingDataDup, labelRefs, documentCategoryMap,
   documentTextMap) = readDataAndReshuffle(args)
  
  # remove duplicates from training data
  includedDocIds = set()
  trainingData = []
  for record in trainingDataDup:
    if record[2] not in includedDocIds:
      includedDocIds.add(record[2])
      trainingData.append(record)
  
  args.networkConfig = getNetworkConfig(args.networkConfigPath)
  model = createModel(numLabels=1, **vars(args))
  model = trainModel(args, model, trainingData, labelRefs)
  
  numDocs = model.getClassifier()._numPatterns
  
  print "Model trained with %d documents" % (numDocs,)
  
  knn = model.getClassifier()
  hc = HierarchicalClustering(knn)
  
  hc.cluster("complete")
  protos, clusterSizes = hc.getClusterPrototypes(args.numClusters,
                                                 numDocs)

  # Run test to ensure consistency with KNN
  if args.knnTest:
    knnTest(protos, knn)
    return


  # Summary statistics
  # bucketCounts[i, j] is the number of occurrances of bucket j in cluster i
  bucketCounts = numpy.zeros((args.numClusters, len(labelRefs)))  

  for clusterId in xrange(len(clusterSizes)):
    print
    print "Cluster %d with %d documents" % (clusterId, clusterSizes[clusterId])
    print "==============="

    prototypeNum = 0
    for index in protos[clusterId]:
      if index != -1:
        docId = trainingData[index][2]
        prototypeNum += 1
        display = prototypeNum <= args.numPrototypes

        if display:
          print "(%d) %s" % (docId, trainingData[index][0])
          print "Buckets:"

        # The docId keys in documentCategoryMap are strings rather than ints
        if docId in documentCategoryMap:
          for bucketId in documentCategoryMap[docId]:
            bucketCounts[clusterId, bucketId] += 1
            if display:
              print "    ", labelRefs[bucketId]
        elif display:
          print "    <None>"
        if display:
          print "\n\n"

  createBucketClusterPlot(args, bucketCounts)
  create2DSVDProjection(args, protos, trainingData, documentCategoryMap, knn)
Пример #9
0
def runExperiment(args):
    if not os.path.exists(SAVE_PATH):
        os.makedirs(SAVE_PATH)

    (trainingDataDup, labelRefs, documentCategoryMap,
     documentTextMap) = readDataAndReshuffle(args)

    # remove duplicates from training data
    includedDocIds = set()
    trainingData = []
    for record in trainingDataDup:
        if record[2] not in includedDocIds:
            includedDocIds.add(record[2])
            trainingData.append(record)

    args.networkConfig = getNetworkConfig(args.networkConfigPath)
    model = createModel(numLabels=1, **vars(args))
    model = trainModel(args, model, trainingData, labelRefs)

    numDocs = model.getClassifier()._numPatterns

    print "Model trained with %d documents" % (numDocs, )

    knn = model.getClassifier()
    hc = HierarchicalClustering(knn)

    hc.cluster("complete")
    protos, clusterSizes = hc.getClusterPrototypes(args.numClusters, numDocs)

    # Run test to ensure consistency with KNN
    if args.knnTest:
        knnTest(protos, knn)
        return

    # Summary statistics
    # bucketCounts[i, j] is the number of occurrances of bucket j in cluster i
    bucketCounts = numpy.zeros((args.numClusters, len(labelRefs)))

    for clusterId in xrange(len(clusterSizes)):
        print
        print "Cluster %d with %d documents" % (clusterId,
                                                clusterSizes[clusterId])
        print "==============="

        prototypeNum = 0
        for index in protos[clusterId]:
            if index != -1:
                docId = trainingData[index][2]
                prototypeNum += 1
                display = prototypeNum <= args.numPrototypes

                if display:
                    print "(%d) %s" % (docId, trainingData[index][0])
                    print "Buckets:"

                # The docId keys in documentCategoryMap are strings rather than ints
                if docId in documentCategoryMap:
                    for bucketId in documentCategoryMap[docId]:
                        bucketCounts[clusterId, bucketId] += 1
                        if display:
                            print "    ", labelRefs[bucketId]
                elif display:
                    print "    <None>"
                if display:
                    print "\n\n"

    createBucketClusterPlot(args, bucketCounts)
    create2DSVDProjection(args, protos, trainingData, documentCategoryMap, knn)