def learn(self, inputPattern, inputCategory, partitionId=None, isSparse=0, rowID=None): """ Train the classifier to associate specified input pattern with a particular category. :param inputPattern: (list) The pattern to be assigned a category. If isSparse is 0, this should be a dense array (both ON and OFF bits present). Otherwise, if isSparse > 0, this should be a list of the indices of the non-zero bits in sorted order :param inputCategory: (int) The category to be associated to the training pattern :param partitionId: (int) partitionID allows you to associate an id with each input vector. It can be used to associate input patterns stored in the classifier with an external id. This can be useful for debugging or visualizing. Another use case is to ignore vectors with a specific id during inference (see description of infer() for details). There can be at most one partitionId per stored pattern (i.e. if two patterns are within distThreshold, only the first partitionId will be stored). This is an optional parameter. :param isSparse: (int) If 0, the input pattern is a dense representation. If isSparse > 0, the input pattern is a list of non-zero indices and isSparse is the length of the dense representation :param rowID: (int) UNKNOWN :returns: The number of patterns currently stored in the classifier """ if self.verbosity >= 1: print("%s learn:" % g_debugPrefix) print(" category:", int(inputCategory)) print(" active inputs:", _labeledInput(inputPattern, cellsPerCol=self.cellsPerCol)) if isSparse > 0: assert all(inputPattern[i] <= inputPattern[i+1] for i in range(len(inputPattern)-1)), \ "Sparse inputPattern must be sorted." assert all(bit < isSparse for bit in inputPattern), \ ("Sparse inputPattern must not index outside the dense " "representation's bounds.") if rowID is None: rowID = self._iterationIdx # Dense vectors if not self.useSparseMemory: # Not supported assert self.cellsPerCol == 0, "not implemented for dense vectors" # If the input was given in sparse form, convert it to dense if isSparse > 0: denseInput = numpy.zeros(isSparse) denseInput[inputPattern] = 1.0 inputPattern = denseInput if self._specificIndexTraining and not self._nextTrainingIndices: # Specific index mode without any index provided - skip training return self._numPatterns if self._Memory is None: # Initialize memory with 100 rows and numPatterns = 0 inputWidth = len(inputPattern) self._Memory = numpy.zeros((100,inputWidth)) self._numPatterns = 0 self._M = self._Memory[:self._numPatterns] addRow = True if self._vt is not None: # Compute projection inputPattern = numpy.dot(self._vt, inputPattern - self._mean) if self.distThreshold > 0: # Check if input is too close to an existing input to be accepted dist = self._calcDistance(inputPattern) minDist = dist.min() addRow = (minDist >= self.distThreshold) if addRow: self._protoSizes = None # need to re-compute if self._numPatterns == self._Memory.shape[0]: # Double the size of the memory self._doubleMemoryNumRows() if not self._specificIndexTraining: # Normal learning - append the new input vector self._Memory[self._numPatterns] = inputPattern self._numPatterns += 1 self._categoryList.append(int(inputCategory)) else: # Specific index training mode - insert vector in specified slot vectorIndex = self._nextTrainingIndices.pop(0) while vectorIndex >= self._Memory.shape[0]: self._doubleMemoryNumRows() self._Memory[vectorIndex] = inputPattern self._numPatterns = max(self._numPatterns, vectorIndex + 1) if vectorIndex >= len(self._categoryList): self._categoryList += [-1] * (vectorIndex - len(self._categoryList) + 1) self._categoryList[vectorIndex] = int(inputCategory) # Set _M to the "active" part of _Memory self._M = self._Memory[0:self._numPatterns] self._addPartitionId(self._numPatterns-1, partitionId) # Sparse vectors else: # If the input was given in sparse form, convert it to dense if necessary if isSparse > 0 and (self._vt is not None or self.distThreshold > 0 \ or self.numSVDDims is not None or self.numSVDSamples is not None \ or self.numWinners > 0): denseInput = numpy.zeros(isSparse) denseInput[inputPattern] = 1.0 inputPattern = denseInput isSparse = 0 # Get the input width if isSparse > 0: inputWidth = isSparse else: inputWidth = len(inputPattern) # Allocate storage if this is the first training vector if self._Memory is None: self._Memory = NearestNeighbor(0, inputWidth) # Support SVD if it is on if self._vt is not None: inputPattern = numpy.dot(self._vt, inputPattern - self._mean) # Threshold the input, zeroing out entries that are too close to 0. # This is only done if we are given a dense input. if isSparse == 0: thresholdedInput = self._sparsifyVector(inputPattern, True) addRow = True # If given the layout of the cells, then turn on the logic that stores # only the start cell for bursting columns. if self.cellsPerCol >= 1: burstingCols = thresholdedInput.reshape(-1, self.cellsPerCol).min(axis=1).nonzero()[0] for col in burstingCols: thresholdedInput[(col * self.cellsPerCol) + 1 : (col * self.cellsPerCol) + self.cellsPerCol] = 0 # Don't learn entries that are too close to existing entries. if self._Memory.nRows() > 0: dist = None # if this vector is a perfect match for one we already learned, then # replace the category - it may have changed with online learning on. if self.replaceDuplicates: dist = self._calcDistance(thresholdedInput, distanceNorm=1) if dist.min() == 0: rowIdx = dist.argmin() self._categoryList[rowIdx] = int(inputCategory) if self.fixedCapacity: self._categoryRecencyList[rowIdx] = rowID addRow = False # Don't add this vector if it matches closely with another we already # added if self.distThreshold > 0: if dist is None or self.distanceNorm != 1: dist = self._calcDistance(thresholdedInput) minDist = dist.min() addRow = (minDist >= self.distThreshold) if not addRow: if self.fixedCapacity: rowIdx = dist.argmin() self._categoryRecencyList[rowIdx] = rowID # If sparsity is too low, we do not want to add this vector if addRow and self.minSparsity > 0.0: if isSparse==0: sparsity = ( float(len(thresholdedInput.nonzero()[0])) / len(thresholdedInput) ) else: sparsity = float(len(inputPattern)) / isSparse if sparsity < self.minSparsity: addRow = False # Add the new sparse vector to our storage if addRow: self._protoSizes = None # need to re-compute if isSparse == 0: self._Memory.addRow(thresholdedInput) else: self._Memory.addRowNZ(inputPattern, [1]*len(inputPattern)) self._numPatterns += 1 self._categoryList.append(int(inputCategory)) self._addPartitionId(self._numPatterns-1, partitionId) if self.fixedCapacity: self._categoryRecencyList.append(rowID) if self._numPatterns > self.maxStoredPatterns and \ self.maxStoredPatterns > 0: leastRecentlyUsedPattern = numpy.argmin(self._categoryRecencyList) self._Memory.deleteRow(leastRecentlyUsedPattern) self._categoryList.pop(leastRecentlyUsedPattern) self._categoryRecencyList.pop(leastRecentlyUsedPattern) self._numPatterns -= 1 if self.numSVDDims is not None and self.numSVDSamples is not None \ and self._numPatterns == self.numSVDSamples: self.computeSVD() return self._numPatterns
def learn(self, inputPattern, inputCategory, partitionId=None, isSparse=0, rowID=None): """ Learn a new training presentation Parameters: ------------------------------------------------------------------------ inputPattern: training pattern to learn. This should be a dense array if isSparse==0 or a list of non-zero indices if isSparse>0 inputCategory: category index of the training pattern. partitionID: ?? isSparse: If >0, the input pattern is a list of non-zero indices and isSparse is the length of the dense representation. """ if self.verbosity >= 1: print "%s learn:" % (g_debugPrefix) print " category:", int(inputCategory) print " active inputs:", _labeledInput( inputPattern, cellsPerCol=self.cellsPerCol) if rowID is None: rowID = self._iterationIdx assert partitionId is None, \ "No documentation is available for partitionId, not sure how it works." #--------------------------------------------------------------------------------- # Dense vectors if not self.useSparseMemory: # Not supported assert self.cellsPerCol == 0, "not implemented for dense vectors" # If the input was given in sparse form, convert it to dense if isSparse > 0: denseInput = numpy.zeros(isSparse) denseInput[inputPattern] = 1.0 inputPattern = denseInput if self._specificIndexTraining and not self._nextTrainingIndices: # Specific index mode without any index provided - skip training return self._numPatterns if self._Memory is None: # Initialize memory with 100 rows and numPatterns = 0 inputWidth = len(inputPattern) self._Memory = numpy.zeros((100, inputWidth)) self._numPatterns = 0 self._M = self._Memory[:self._numPatterns] addRow = True if self._vt is not None: # Compute projection inputPattern = numpy.dot(self._vt, inputPattern - self._mean) if self.distThreshold > 0: # Check if input is too close to an existing input to be accepted dist = self._calcDistance(inputPattern) minDist = dist.min() addRow = (minDist >= self.distThreshold) if addRow: self._protoSizes = None # need to re-compute if self._numPatterns == self._Memory.shape[0]: # Double the size of the memory self._doubleMemoryNumRows() if not self._specificIndexTraining: # Normal learning - append the new input vector self._Memory[self._numPatterns] = inputPattern self._numPatterns += 1 self._categoryList.append(int(inputCategory)) else: # Specific index training mode - insert vector in specified slot vectorIndex = self._nextTrainingIndices.pop(0) while vectorIndex >= self._Memory.shape[0]: self._doubleMemoryNumRows() self._Memory[vectorIndex] = inputPattern self._numPatterns = max(self._numPatterns, vectorIndex + 1) if vectorIndex >= len(self._categoryList): self._categoryList += [-1] * ( vectorIndex - len(self._categoryList) + 1) self._categoryList[vectorIndex] = int(inputCategory) # Set _M to the "active" part of _Memory self._M = self._Memory[0:self._numPatterns] if partitionId is not None: self._partitionIdList.append(partitionId) #--------------------------------------------------------------------------------- # Sparse vectors else: # If the input was given in sparse form, convert it to dense if necessary if isSparse > 0 and (self._vt is not None or self.distThreshold > 0 \ or self.numSVDDims is not None or self.numSVDSamples is not None \ or self.numWinners > 0): denseInput = numpy.zeros(isSparse) denseInput[inputPattern] = 1.0 inputPattern = denseInput isSparse = 0 # Get the input width if isSparse > 0: inputWidth = isSparse else: inputWidth = len(inputPattern) # Allocate storage if this is the first training vector if self._Memory is None: self._Memory = NearestNeighbor(0, inputWidth) # Support SVD if it is on if self._vt is not None: inputPattern = numpy.dot(self._vt, inputPattern - self._mean) # Threshold the input, zeroing out entries that are too close to 0. # This is only done if we are given a dense input. if isSparse == 0: thresholdedInput = self._sparsifyVector(inputPattern, True) addRow = True # If given the layout of the cells, then turn on the logic that stores # only the start cell for bursting columns. if self.cellsPerCol >= 1: numCols = thresholdedInput.size / self.cellsPerCol burstingCols = thresholdedInput.reshape( -1, self.cellsPerCol).min(axis=1).nonzero()[0] for col in burstingCols: thresholdedInput[(col * self.cellsPerCol) + 1:(col * self.cellsPerCol) + self.cellsPerCol] = 0 # Don't learn entries that are too close to existing entries. if self._Memory.nRows() > 0: dist = None # if this vector is a perfect match for one we already learned, then # replace the category - it may have changed with online learning on. if self.replaceDuplicates: dist = self._calcDistance(thresholdedInput, distanceNorm=1) if dist.min() == 0: rowIdx = dist.argmin() self._categoryList[rowIdx] = int(inputCategory) if self.fixedCapacity: self._categoryRecencyList[rowIdx] = rowID addRow = False # Don't add this vector if it matches closely with another we already # added if self.distThreshold > 0: if dist is None or self.distanceNorm != 1: dist = self._calcDistance(thresholdedInput) minDist = dist.min() addRow = (minDist >= self.distThreshold) if not addRow: if self.fixedCapacity: rowIdx = dist.argmin() self._categoryRecencyList[rowIdx] = rowID # Add the new vector to our storage if addRow: self._protoSizes = None # need to re-compute if isSparse == 0: self._Memory.addRow(thresholdedInput) else: self._Memory.addRowNZ(inputPattern, [1] * len(inputPattern)) self._numPatterns += 1 self._categoryList.append(int(inputCategory)) if partitionId is not None: self._partitionIdList.append(partitionId) if self.fixedCapacity: self._categoryRecencyList.append(rowID) if self._numPatterns > self.maxStoredPatterns and \ self.maxStoredPatterns > 0: leastRecentlyUsedPattern = numpy.argmin( self._categoryRecencyList) self._Memory.deleteRow(leastRecentlyUsedPattern) self._categoryList.pop(leastRecentlyUsedPattern) self._categoryRecencyList.pop(leastRecentlyUsedPattern) self._numPatterns -= 1 if self.numSVDDims is not None and self.numSVDSamples is not None \ and self._numPatterns == self.numSVDSamples: self.computeSVD() return self._numPatterns
def read(cls, proto): if proto.version != KNNCLASSIFIER_VERSION: raise RuntimeError("Invalid KNNClassifier Version") knn = object.__new__(cls) knn.version = proto.version knn.k = proto.k knn.exact = proto.exact knn.distanceNorm = proto.distanceNorm knn.distanceMethod = proto.distanceMethod knn.distThreshold = proto.distThreshold knn.doBinarization = proto.doBinarization knn.binarizationThreshold = proto.binarizationThreshold knn.useSparseMemory = proto.useSparseMemory knn.sparseThreshold = proto.sparseThreshold knn.relativeThreshold = proto.relativeThreshold knn.numWinners = proto.numWinners knn.numSVDSamples = proto.numSVDSamples knn.numSVDDims = proto.numSVDDims knn.fractionOfMax = proto.fractionOfMax knn.verbosity = proto.verbosity knn.maxStoredPatterns = proto.maxStoredPatterns knn.replaceDuplicates = proto.replaceDuplicates knn.cellsPerCol = proto.cellsPerCol knn.minSparsity = proto.minSparsity if knn.numSVDDims == "adaptive": knn._adaptiveSVDDims = True else: knn._adaptiveSVDDims = False # Read private state knn.clear() if proto.memory is not None: which = proto.memory.which() if which == "ndarray": knn._Memory = numpy.array(proto.memory.ndarray, dtype=numpy.float64) elif which == "nearestNeighbor": knn._Memory = NearestNeighbor() knn._Memory.read(proto.memory.nearestNeighbor) knn._numPatterns = proto.numPatterns if proto.m is not None: knn._M = numpy.array(proto.m, dtype=numpy.float64) if proto.categoryList is not None: knn._categoryList = list(proto.categoryList) if proto.partitionIdList is not None: knn._partitionIdList = list(proto.partitionIdList) knn._rebuildPartitionIdMap(knn._partitionIdList) knn._iterationIdx = proto.iterationIdx knn._finishedLearning = proto.finishedLearning if proto.s is not None: knn._s = numpy.array(proto.s, dtype=numpy.float32) if proto.vt is not None: knn._vt = numpy.array(proto.vt, dtype=numpy.float32) if proto.mean is not None: knn._mean = numpy.array(proto.mean, dtype=numpy.float32) return knn