def init(): global numColumns, numInputs, inputDimensions, columnDimensions, connectedSynapses global numColumns inputDimensions = [32, 32] columnDimensions = [64, 64] initConnectedPct = 0.5 # Check the matrix SM_01_32_32 inputDimensions = numpy.array(inputDimensions, ndmin=1) columnDimensions = numpy.array(columnDimensions, ndmin=1) numColumns = columnDimensions.prod() numInputs = inputDimensions.prod() potentialPools = SparseBinaryMatrix(numInputs) potentialPools.resize(numColumns, numInputs) permances = SparseMatrix(numColumns, numInputs) random = NupicRandom() tieBreaker = 0.01*numpy.array([random.getReal64() for i in xrange(numColumns)]) connectedSynapses = SparseBinaryMatrix(numInputs) connectedSynapses.resize(numColumns, numInputs) connectedCounts = numpy.zeros(numColumns, dtype=realDType) potentialPools.replaceSparseRow(0, numpy.array([0, 1], dtype='int'))
def __init__(self, width, seed): # Arbitrary value that's compatible with UInt32 in the proto schema # for testing serialization of python-native property self.width = width # For testing serialization of object implemented in the extension self.rand = Random(seed)
def testEquals(self): r1 = Random(42) v1 = r1.getReal64() r2 = Random(42) v2 = r2.getReal64() self.assertEquals(v1, v2) self.assertEquals(r1, r2)
def testNupicRandomPickling(self): """Test pickling / unpickling of NuPIC randomness.""" # Simple test: make sure that dumping / loading works... r = Random(42) pickledR = pickle.dumps(r) test1 = [r.getUInt32() for _ in xrange(10)] r = pickle.loads(pickledR) test2 = [r.getUInt32() for _ in xrange(10)] self.assertEqual(test1, test2, "Simple NuPIC random pickle/unpickle failed.") # A little tricker: dump / load _after_ some numbers have been generated # (in the first test). Things should still work... # ...the idea of this test is to make sure that the pickle code isn't just # saving the initial seed... pickledR = pickle.dumps(r) test3 = [r.getUInt32() for _ in xrange(10)] r = pickle.loads(pickledR) test4 = [r.getUInt32() for _ in xrange(10)] self.assertEqual( test3, test4, "NuPIC random pickle/unpickle didn't work for saving later state.") self.assertNotEqual(test1, test3, "NuPIC random gave the same result twice?!?")
def testShuffleEmpty(self): r = Random(42) arr = numpy.zeros([0], dtype="uint32") r.shuffle(arr) self.assertEqual(arr.size, 0)
def main(): """Measure serialization performance of Random """ r = Random(42) # Measure serialization startSerializationTime = time.time() for i in range(_SERIALIZATION_LOOPS): r.saveToFile("RandonSerialization.stream") elapsedSerializationTime = time.time() - startSerializationTime # Measure deserialization startDeserializationTime = time.time() for _ in range(_DESERIALIZATION_LOOPS): r.loadFromFile("RandonSerialization.stream") elapsedDeserializationTime = time.time() - startDeserializationTime # Print report print(_SERIALIZATION_LOOPS, "Serialization loops in", \ elapsedSerializationTime, "seconds.") print("\t", elapsedSerializationTime/_SERIALIZATION_LOOPS, "seconds per loop.") print(deserializationCount, "Deserialization loops in", \ elapsedDeserializationTime, "seconds.") print("\t", elapsedDeserializationTime/deserializationCount, "seconds per loop.")
def _seed(self, seed=-1): """ Initialize the random seed """ if seed != -1: self.random = NupicRandom(seed) else: self.random = NupicRandom()
def testSampleNone(self): r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") # Just make sure there is no exception thrown. choices = r.sample(population, 0) self.assertEqual(len(choices), 0)
def testSample(self): r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") choices = r.sample(population, 2) self.assertEqual(choices[0], 2) self.assertEqual(choices[1], 1)
def __init__(self, columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, **kwargs): # Error checking if not len(columnDimensions): raise ValueError( "Number of column dimensions must be greater than 0") if cellsPerColumn <= 0: raise ValueError( "Number of cells per column must be greater than 0") if minThreshold > activationThreshold: raise ValueError( "The min threshold can't be greater than the activation threshold" ) # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement self.maxSegmentsPerCell = maxSegmentsPerCell self.maxSynapsesPerSegment = maxSynapsesPerSegment # Initialize member variables self.connections = self.connectionsFactory(self.numberOfCells()) self._random = Random(seed) self.activeCells = [] self.winnerCells = [] self.activeSegments = [] self.matchingSegments = [] self.numActiveConnectedSynapsesForSegment = [] self.numActivePotentialSynapsesForSegment = [] self.iteration = 0 self.lastUsedIterationForSegment = []
def testSample(self): r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") choices = numpy.zeros([2], dtype="uint32") r.sample(population, choices) self.assertEqual(choices[0], 2) self.assertEqual(choices[1], 4)
def testSampleNone(self): r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") choices = numpy.zeros([0], dtype="uint32") # Just make sure there is no exception thrown. r.sample(population, choices) self.assertEqual(choices.size, 0)
def __init__(self, columnDimensions=(2048, ), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, seed=42): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented. @param seed (int) Seed for the random number generator. """ # Error checking if not len(columnDimensions): raise ValueError( "Number of column dimensions must be greater than 0") if not cellsPerColumn > 0: raise ValueError( "Number of cells per column must be greater than 0") # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement # Initialize member variables self.connections = Connections(self.numberOfCells()) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() self.matchingSegments = set() self.matchingCells = set()
def testSample(self): r = Random(42) population = cupy.array([1, 2, 3, 4], dtype="uint32") choices = cupy.zeros([2], dtype="uint32") r.sample(population, choices) self.assertEqual(choices[0], 1) self.assertEqual(choices[1], 3)
def testShuffle(self): r = Random(42) arr = numpy.array([1, 2, 3, 4], dtype="uint32") r.shuffle(arr) self.assertEqual(arr[0], 2) self.assertEqual(arr[1], 1) self.assertEqual(arr[2], 4) self.assertEqual(arr[3], 3)
def __init__(self, dataWidth, randomSeed): if dataWidth <= 0: raise ValueError("Parameter dataWidth must be > 0") # Arbitrary value that's compatible with UInt32 in the proto schema # for testing serialization of python-native property self._dataWidth = dataWidth # For testing serialization of object implemented in the extension self._rand = Random(randomSeed)
def testShuffle(self): r = Random(42) arr = numpy.array([1, 2, 3, 4], dtype="uint32") r.shuffle(arr) self.assertEqual(arr[0], 3) self.assertEqual(arr[1], 4) self.assertEqual(arr[2], 2) self.assertEqual(arr[3], 1)
def setRandomSeed(self, seed): """ Reset the nupic random generator. This is necessary to reset random seed to generate new sequences. @param seed (int) Seed for nupic.bindings.Random. """ self.seed = seed self._random = Random() self._random.setSeed(seed)
def _orderForCoordinate(cls, coordinate): """ Returns the order for a coordinate. @param coordinate (numpy.array) Coordinate @return (float) A value in the interval [0, 1), representing the order of the coordinate """ seed = cls._hashCoordinate(coordinate) rng = Random(seed) return rng.getReal64()
def _bitForCoordinate(cls, coordinate, n): """ Maps the coordinate to a bit in the SDR. @param coordinate (numpy.array) Coordinate @param n (int) The number of available bits in the SDR @return (int) The index to a bit in the SDR """ seed = cls._hashCoordinate(coordinate) rng = Random(seed) return rng.getUInt32(n)
def __init__(self, patternMachine, seed=42): """ @param patternMachine (PatternMachine) Pattern machine instance """ # Save member variables self.patternMachine = patternMachine # Initialize member variables self._random = Random(seed)
def read(cls, proto): """ Reads deserialized data from proto object @param proto (DynamicStructBuilder) Proto object @return (TemporalMemory) TemporalMemory instance """ tm = object.__new__(cls) tm.columnDimensions = list(proto.columnDimensions) tm.cellsPerColumn = int(proto.cellsPerColumn) tm.activationThreshold = int(proto.activationThreshold) tm.initialPermanence = proto.initialPermanence tm.connectedPermanence = proto.connectedPermanence tm.minThreshold = int(proto.minThreshold) tm.maxNewSynapseCount = int(proto.maxNewSynapseCount) tm.permanenceIncrement = proto.permanenceIncrement tm.permanenceDecrement = proto.permanenceDecrement tm.predictedSegmentDecrement = proto.predictedSegmentDecrement tm.connections = Connections.read(proto.connections) tm._random = Random() tm._random.read(proto.random) tm.activeCells = set([int(x) for x in proto.activeCells]) tm.predictiveCells = set([int(x) for x in proto.predictiveCells]) tm.activeSegments = set([int(x) for x in proto.activeSegments]) tm.winnerCells = set([int(x) for x in proto.winnerCells]) tm.matchingSegments = set([int(x) for x in proto.matchingSegments]) tm.matchingCells = set([int(x) for x in proto.matchingCells]) return tm
def read(cls, proto): """ Reads deserialized data from proto object @param proto (DynamicStructBuilder) Proto object @return (TemporalMemory) TemporalMemory instance """ tm = object.__new__(cls) # capnp fails to save a tuple, so proto.columnDimensions was forced to # serialize as a list. We prefer a tuple, however, because columnDimensions # should be regarded as immutable. tm.columnDimensions = tuple(proto.columnDimensions) tm.cellsPerColumn = int(proto.cellsPerColumn) tm.activationThreshold = int(proto.activationThreshold) tm.initialPermanence = proto.initialPermanence tm.connectedPermanence = proto.connectedPermanence tm.minThreshold = int(proto.minThreshold) tm.maxNewSynapseCount = int(proto.maxNewSynapseCount) tm.permanenceIncrement = proto.permanenceIncrement tm.permanenceDecrement = proto.permanenceDecrement tm.predictedSegmentDecrement = proto.predictedSegmentDecrement tm.connections = Connections.read(proto.connections) #pylint: disable=W0212 tm._random = Random() tm._random.read(proto.random) #pylint: enable=W0212 tm.activeCells = [int(x) for x in proto.activeCells] tm.winnerCells = [int(x) for x in proto.winnerCells] flatListLength = tm.connections.segmentFlatListLength() tm.numActiveConnectedSynapsesForSegment = [0] * flatListLength tm.numActivePotentialSynapsesForSegment = [0] * flatListLength tm.activeSegments = [] tm.matchingSegments = [] for i in xrange(len(proto.activeSegmentOverlaps)): protoSegmentOverlap = proto.activeSegmentOverlaps[i] segment = tm.connections.getSegment(protoSegmentOverlap.cell, protoSegmentOverlap.segment) tm.activeSegments.append(segment) overlap = protoSegmentOverlap.overlap tm.numActiveConnectedSynapsesForSegment[segment.flatIdx] = overlap for i in xrange(len(proto.matchingSegmentOverlaps)): protoSegmentOverlap = proto.matchingSegmentOverlaps[i] segment = tm.connections.getSegment(protoSegmentOverlap.cell, protoSegmentOverlap.segment) tm.matchingSegments.append(segment) overlap = protoSegmentOverlap.overlap tm.numActivePotentialSynapsesForSegment[segment.flatIdx] = overlap return tm
def testSampleWrongDimensionsChoices(self): """Check that passing a multi-dimensional array throws a ValueError.""" r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") choices = numpy.zeros([2, 2], dtype="uint32") self.assertRaises(ValueError, r.sample, population, choices)
def testSamplePopulationTooSmall(self): r = Random(42) population = numpy.array([1, 2, 3, 4], dtype="uint32") choices = numpy.zeros([5], dtype="uint32") self.assertRaises( ValueError, r.sample, population, choices)
def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, learningRadius=2048, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, seed=42): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param learningRadius (int) Radius around cell from which it can sample to form distal dendrite connections. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param seed (int) Seed for the random number generator. """ # TODO: Validate all parameters (and add validation tests) # Initialize member variables self.connections = Connections(columnDimensions, cellsPerColumn) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.numActiveSynapsesForSegment = dict() self.winnerCells = set() # Save member variables self.activationThreshold = activationThreshold self.learningRadius = learningRadius self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement
def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxsize))
def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxint))
def __init__(self, trnCellShape=(32, 32), relayCellShape=(32, 32), inputShape=(32, 32), l6CellCount=1024, trnThreshold=10, relayThreshold=1, seed=42): """ :param trnCellShape: a 2D shape for the TRN :param relayCellShape: a 2D shape for the relay cells :param l6CellCount: number of L6 cells :param trnThreshold: dendritic threshold for TRN cells. This is the min number of active L6 cells on a dendrite for the TRN cell to recognize a pattern on that dendrite. :param relayThreshold: dendritic threshold for relay cells. This is the min number of active TRN cells on a dendrite for the relay cell to recognize a pattern on that dendrite. :param seed: Seed for the random number generator. """ self.trnCellShape = trnCellShape self.trnWidth = trnCellShape[0] self.trnHeight = trnCellShape[1] self.relayCellShape = relayCellShape self.relayWidth = relayCellShape[0] self.relayHeight = relayCellShape[1] self.l6CellCount = l6CellCount self.trnThreshold = trnThreshold self.relayThreshold = relayThreshold self.inputShape = inputShape self.seed = seed self.rng = Random(seed) self.trnActivationThreshold = 5 self.trnConnections = SparseMatrixConnections( trnCellShape[0]*trnCellShape[1], l6CellCount) self.relayConnections = SparseMatrixConnections( relayCellShape[0]*relayCellShape[1], trnCellShape[0]*trnCellShape[1]) # Initialize/reset variables that are updated with calls to compute self.reset() self._initializeTRNToRelayCellConnections()
def testEquals(self): r1 = Random(42) v1 = r1.getReal64() i1 = r1.getUInt32() r2 = Random(42) v2 = r2.getReal64() i2 = r2.getUInt32() self.assertEqual(v1, v2) self.assertEqual(r1, r2) self.assertEqual(i1, i2)
def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate()
def testSampleSequenceRaisesTypeError(self): """Check that passing lists throws a TypeError. This behavior may change if sample is extended to understand sequences. """ r = Random(42) population = [1, 2, 3, 4] self.assertRaises(TypeError, r.sample, population, 2)
class PythonDummyRegion(object): """This class represents a serializable object that contains both native python properties as well as properties implemented in an extension. """ def __init__(self, width, seed): # Arbitrary value that's compatible with UInt32 in the proto schema # for testing serialization of python-native property self.width = width # For testing serialization of object implemented in the extension self.rand = Random(seed) def write(self, proto): """ Serialize this instance into PyRegionProto builder. Emulates `PyRegion.write`. NOTE Called from nupic.bindings extension. :param proto: PyRegionProto builder """ regionImpl = proto.regionImpl.as_struct(PythonDummyRegionProto) self.writeToProto(regionImpl) def writeToProto(self, proto): """ Serialize this instance into PythonDummyRegionProto builder :param proto: PythonDummyRegionProto builder """ proto.width = self.width proto.random = self.rand.writeOut() @classmethod def read(cls, proto): """Deserialize from the given PyRegionProto reader. Emulates `PyRegion.read`. NOTE Called from nupic.bindings extension. :param proto: PyRegionProto reader :returns: Instance of PythonDummyRegion initialized from proto """ regionImpl = proto.regionImpl.as_struct(PythonDummyRegionProto) return cls.readFromProto(regionImpl) @classmethod def readFromProto(cls, proto): """Deserialize from the given PythonDummyRegionProto reader :param proto: PythonDummyRegionProto reader :returns: Instance of PythonDummyRegion initialized from proto """ obj = object.__new__(cls) obj.width = proto.width obj.rand = Random.readIn(proto.random) return obj
def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement = 0.004, seed=42): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented. @param seed (int) Seed for the random number generator. """ # Error checking if not len(columnDimensions): raise ValueError("Number of column dimensions must be greater than 0") if not cellsPerColumn > 0: raise ValueError("Number of cells per column must be greater than 0") # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement # Initialize member variables self.connections = Connections(self.numberOfCells()) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() self.matchingSegments = set() self.matchingCells = set()
def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, **kwargs): # Error checking if not len(columnDimensions): raise ValueError("Number of column dimensions must be greater than 0") if cellsPerColumn <= 0: raise ValueError("Number of cells per column must be greater than 0") if minThreshold > activationThreshold: raise ValueError( "The min threshold can't be greater than the activation threshold") # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement self.maxSegmentsPerCell = maxSegmentsPerCell self.maxSynapsesPerSegment = maxSynapsesPerSegment # Initialize member variables self.connections = self.connectionsFactory(self.numberOfCells()) self._random = Random(seed) self.activeCells = [] self.winnerCells = [] self.activeSegments = [] self.matchingSegments = [] self.numActiveConnectedSynapsesForSegment = [] self.numActivePotentialSynapsesForSegment = [] self.iteration = 0 self.lastUsedIterationForSegment = []
def readFromProto(cls, proto): """Deserialize from the given PythonDummyRegionProto reader :param proto: PythonDummyRegionProto reader :returns: Instance of PythonDummyRegion initialized from proto """ obj = object.__new__(cls) obj.width = proto.width obj.rand = Random.readIn(proto.random) return obj
def testNupicRandomPickling(self): """Test pickling / unpickling of NuPIC randomness.""" # Simple test: make sure that dumping / loading works... r = Random(42) pickledR = pickle.dumps(r) test1 = [r.getUInt32() for _ in range(10)] r = pickle.loads(pickledR) test2 = [r.getUInt32() for _ in range(10)] self.assertEqual(test1, test2, "Simple NuPIC random pickle/unpickle failed.") # A little tricker: dump / load _after_ some numbers have been generated # (in the first test). Things should still work... # ...the idea of this test is to make sure that the pickle code isn't just # saving the initial seed... pickledR = pickle.dumps(r) test3 = [r.getUInt32() for _ in range(10)] r = pickle.loads(pickledR) test4 = [r.getUInt32() for _ in range(10)] self.assertEqual( test3, test4, "NuPIC random pickle/unpickle didn't work for saving later state.") self.assertNotEqual(test1, test3, "NuPIC random gave the same result twice?!?")
def main(): """Measure serialization performance of Random """ r = Random(42) # Measure serialization startSerializationTime = time.time() for i in range(_SERIALIZATION_LOOPS): r.saveToFile("RandonSerialization.stream") elapsedSerializationTime = time.time() - startSerializationTime # Measure deserialization startDeserializationTime = time.time() for _ in range(_DESERIALIZATION_LOOPS): r.loadFromFile("RandonSerialization.stream") elapsedDeserializationTime = time.time() - startDeserializationTime # Print report print(_SERIALIZATION_LOOPS, "Serialization loops in", \ elapsedSerializationTime, "seconds.") print("\t", elapsedSerializationTime / _SERIALIZATION_LOOPS, "seconds per loop.") print(deserializationCount, "Deserialization loops in", \ elapsedDeserializationTime, "seconds.") print("\t", elapsedDeserializationTime / deserializationCount, "seconds per loop.")
def __init__(self, columnDimensions=(2048,), basalInputDimensions=(), apicalInputDimensions=(), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, sampleSize=20, permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxNewSynapseCount=None, maxSynapsesPerSegment=-1, maxSegmentsPerCell=None, seed=42): self.columnDimensions = columnDimensions self.numColumns = self._numPoints(columnDimensions) self.basalInputDimensions = basalInputDimensions self.apicalInputDimensions = apicalInputDimensions self.cellsPerColumn = cellsPerColumn self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.sampleSize = sampleSize if maxNewSynapseCount is not None: print "Parameter 'maxNewSynapseCount' is deprecated. Use 'sampleSize'." self.sampleSize = maxNewSynapseCount if maxSegmentsPerCell is not None: print "Warning: ignoring parameter 'maxSegmentsPerCell'" self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement self.activationThreshold = activationThreshold self.maxSynapsesPerSegment = maxSynapsesPerSegment self.basalConnections = SparseMatrixConnections( self.numColumns*cellsPerColumn, self._numPoints(basalInputDimensions)) self.apicalConnections = SparseMatrixConnections( self.numColumns*cellsPerColumn, self._numPoints(apicalInputDimensions)) self.rng = Random(seed) self.activeCells = EMPTY_UINT_ARRAY self.winnerCells = EMPTY_UINT_ARRAY self.prevPredictedCells = EMPTY_UINT_ARRAY
def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, learningRadius=2048, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, seed=42): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param learningRadius (int) Radius around cell from which it can sample to form distal dendrite connections. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param seed (int) Seed for the random number generator. """ # TODO: Validate all parameters (and add validation tests) # Initialize member variables self.connections = Connections(columnDimensions, cellsPerColumn) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() # Save member variables self.activationThreshold = activationThreshold self.learningRadius = learningRadius self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement
class TemporalMemory(object): """ Class implementing the Temporal Memory algorithm. """ def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, learningRadius=2048, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, seed=42): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param learningRadius (int) Radius around cell from which it can sample to form distal dendrite connections. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param seed (int) Seed for the random number generator. """ # Error checking if not len(columnDimensions): raise ValueError("Number of column dimensions must be greater than 0") if not cellsPerColumn > 0: raise ValueError("Number of cells per column must be greater than 0") # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.learningRadius = learningRadius self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement # Initialize member variables self.connections = Connections(self.numberOfCells()) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() # ============================== # Main functions # ============================== def compute(self, activeColumns, learn=True): """ Feeds input record through TM, performing inference and learning. Updates member variables with new state. @param activeColumns (set) Indices of active columns in `t` """ (activeCells, winnerCells, activeSegments, predictiveCells, predictedColumns) = self.computeFn(activeColumns, self.predictiveCells, self.activeSegments, self.activeCells, self.winnerCells, self.connections, learn=learn) self.activeCells = activeCells self.winnerCells = winnerCells self.activeSegments = activeSegments self.predictiveCells = predictiveCells def computeFn(self, activeColumns, prevPredictiveCells, prevActiveSegments, prevActiveCells, prevWinnerCells, connections, learn=True): """ 'Functional' version of compute. Returns new state. @param activeColumns (set) Indices of active columns in `t` @param prevPredictiveCells (set) Indices of predictive cells in `t-1` @param prevActiveSegments (set) Indices of active segments in `t-1` @param prevActiveCells (set) Indices of active cells in `t-1` @param prevWinnerCells (set) Indices of winner cells in `t-1` @param connections (Connections) Connectivity of layer @param learn (bool) Whether or not learning is enabled @return (tuple) Contains: `activeCells` (set), `winnerCells` (set), `activeSegments` (set), `predictiveCells` (set) """ activeCells = set() winnerCells = set() (_activeCells, _winnerCells, predictedColumns) = self.activateCorrectlyPredictiveCells( prevPredictiveCells, activeColumns) activeCells.update(_activeCells) winnerCells.update(_winnerCells) (_activeCells, _winnerCells, learningSegments) = self.burstColumns(activeColumns, predictedColumns, prevActiveCells, prevWinnerCells, connections) activeCells.update(_activeCells) winnerCells.update(_winnerCells) if learn: self.learnOnSegments(prevActiveSegments, learningSegments, prevActiveCells, winnerCells, prevWinnerCells, connections) (activeSegments, predictiveCells) = self.computePredictiveCells(activeCells, connections) return (activeCells, winnerCells, activeSegments, predictiveCells, predictedColumns) def reset(self): """ Indicates the start of a new sequence. Resets sequence state of the TM. """ self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() # ============================== # Phases # ============================== def activateCorrectlyPredictiveCells(self, prevPredictiveCells, activeColumns): """ Phase 1: Activate the correctly predictive cells. Pseudocode: - for each prev predictive cell - if in active column - mark it as active - mark it as winner cell - mark column as predicted @param prevPredictiveCells (set) Indices of predictive cells in `t-1` @param activeColumns (set) Indices of active columns in `t` @return (tuple) Contains: `activeCells` (set), `winnerCells` (set), `predictedColumns` (set) """ activeCells = set() winnerCells = set() predictedColumns = set() for cell in prevPredictiveCells: column = self.columnForCell(cell) if column in activeColumns: activeCells.add(cell) winnerCells.add(cell) predictedColumns.add(column) return activeCells, winnerCells, predictedColumns def burstColumns(self, activeColumns, predictedColumns, prevActiveCells, prevWinnerCells, connections): """ Phase 2: Burst unpredicted columns. Pseudocode: - for each unpredicted active column - mark all cells as active - mark the best matching cell as winner cell - (learning) - if it has no matching segment - (optimization) if there are prev winner cells - add a segment to it - mark the segment as learning @param activeColumns (set) Indices of active columns in `t` @param predictedColumns (set) Indices of predicted columns in `t` @param prevActiveCells (set) Indices of active cells in `t-1` @param prevWinnerCells (set) Indices of winner cells in `t-1` @param connections (Connections) Connectivity of layer @return (tuple) Contains: `activeCells` (set), `winnerCells` (set), `learningSegments` (set) """ activeCells = set() winnerCells = set() learningSegments = set() unpredictedColumns = activeColumns - predictedColumns for column in unpredictedColumns: cells = self.cellsForColumn(column) activeCells.update(cells) (bestCell, bestSegment) = self.bestMatchingCell(cells, prevActiveCells, connections) winnerCells.add(bestCell) if bestSegment is None and len(prevWinnerCells): bestSegment = connections.createSegment(bestCell) if bestSegment is not None: learningSegments.add(bestSegment) return activeCells, winnerCells, learningSegments def learnOnSegments(self, prevActiveSegments, learningSegments, prevActiveCells, winnerCells, prevWinnerCells, connections): """ Phase 3: Perform learning by adapting segments. Pseudocode: - (learning) for each prev active or learning segment - if learning segment or from winner cell - strengthen active synapses - weaken inactive synapses - if learning segment - add some synapses to the segment - subsample from prev winner cells @param prevActiveSegments (set) Indices of active segments in `t-1` @param learningSegments (set) Indices of learning segments in `t` @param prevActiveCells (set) Indices of active cells in `t-1` @param winnerCells (set) Indices of winner cells in `t` @param prevWinnerCells (set) Indices of winner cells in `t-1` @param connections (Connections) Connectivity of layer """ for segment in prevActiveSegments | learningSegments: isLearningSegment = segment in learningSegments isFromWinnerCell = connections.cellForSegment(segment) in winnerCells activeSynapses = self.activeSynapsesForSegment( segment, prevActiveCells, connections) if isLearningSegment or isFromWinnerCell: self.adaptSegment(segment, activeSynapses, connections) if isLearningSegment: n = self.maxNewSynapseCount - len(activeSynapses) for presynapticCell in self.pickCellsToLearnOn(n, segment, prevWinnerCells, connections): connections.createSynapse(segment, presynapticCell, self.initialPermanence) def computePredictiveCells(self, activeCells, connections): """ Phase 4: Compute predictive cells due to lateral input on distal dendrites. Pseudocode: - for each distal dendrite segment with activity >= activationThreshold - mark the segment as active - mark the cell as predictive Forward propagates activity from active cells to the synapses that touch them, to determine which synapses are active. @param activeCells (set) Indices of active cells in `t` @param connections (Connections) Connectivity of layer @return (tuple) Contains: `activeSegments` (set), `predictiveCells` (set) """ numActiveConnectedSynapsesForSegment = defaultdict(lambda: 0) activeSegments = set() predictiveCells = set() for cell in activeCells: for synapseData in connections.synapsesForPresynapticCell(cell).values(): segment = synapseData.segment permanence = synapseData.permanence if permanence >= self.connectedPermanence: numActiveConnectedSynapsesForSegment[segment] += 1 if (numActiveConnectedSynapsesForSegment[segment] >= self.activationThreshold): activeSegments.add(segment) predictiveCells.add(connections.cellForSegment(segment)) return activeSegments, predictiveCells # ============================== # Helper functions # ============================== def bestMatchingCell(self, cells, activeCells, connections): """ Gets the cell with the best matching segment (see `TM.bestMatchingSegment`) that has the largest number of active synapses of all best matching segments. If none were found, pick the least used cell (see `TM.leastUsedCell`). @param cells (set) Indices of cells @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (tuple) Contains: `cell` (int), `bestSegment` (int) """ maxSynapses = 0 bestCell = None bestSegment = None for cell in cells: segment, numActiveSynapses = self.bestMatchingSegment( cell, activeCells, connections) if segment is not None and numActiveSynapses > maxSynapses: maxSynapses = numActiveSynapses bestCell = cell bestSegment = segment if bestCell is None: bestCell = self.leastUsedCell(cells, connections) return bestCell, bestSegment def bestMatchingSegment(self, cell, activeCells, connections): """ Gets the segment on a cell with the largest number of activate synapses, including all synapses with non-zero permanences. @param cell (int) Cell index @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (tuple) Contains: `segment` (int), `connectedActiveSynapses` (set) """ maxSynapses = self.minThreshold bestSegment = None bestNumActiveSynapses = None for segment in connections.segmentsForCell(cell): numActiveSynapses = 0 for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) if synapseData.presynapticCell in activeCells: numActiveSynapses += 1 if numActiveSynapses >= maxSynapses: maxSynapses = numActiveSynapses bestSegment = segment bestNumActiveSynapses = numActiveSynapses return bestSegment, bestNumActiveSynapses def leastUsedCell(self, cells, connections): """ Gets the cell with the smallest number of segments. Break ties randomly. @param cells (set) Indices of cells @param connections (Connections) Connectivity of layer @return (int) Cell index """ leastUsedCells = set() minNumSegments = float("inf") for cell in cells: numSegments = len(connections.segmentsForCell(cell)) if numSegments < minNumSegments: minNumSegments = numSegments leastUsedCells = set() if numSegments == minNumSegments: leastUsedCells.add(cell) i = self._random.getUInt32(len(leastUsedCells)) return sorted(leastUsedCells)[i] @staticmethod def activeSynapsesForSegment(segment, activeCells, connections): """ Returns the synapses on a segment that are active due to lateral input from active cells. @param segment (int) Segment index @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (set) Indices of active synapses on segment """ synapses = set() for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) if synapseData.presynapticCell in activeCells: synapses.add(synapse) return synapses def adaptSegment(self, segment, activeSynapses, connections): """ Updates synapses on segment. Strengthens active synapses; weakens inactive synapses. @param segment (int) Segment index @param activeSynapses (set) Indices of active synapses @param connections (Connections) Connectivity of layer """ for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) permanence = synapseData.permanence if synapse in activeSynapses: permanence += self.permanenceIncrement else: permanence -= self.permanenceDecrement # Keep permanence within min/max bounds permanence = max(0.0, min(1.0, permanence)) connections.updateSynapsePermanence(synapse, permanence) def pickCellsToLearnOn(self, n, segment, winnerCells, connections): """ Pick cells to form distal connections to. TODO: Respect topology and learningRadius @param n (int) Number of cells to pick @param segment (int) Segment index @param winnerCells (set) Indices of winner cells in `t` @param connections (Connections) Connectivity of layer @return (set) Indices of cells picked """ candidates = set(winnerCells) # Remove cells that are already synapsed on by this segment for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) presynapticCell = synapseData.presynapticCell if presynapticCell in candidates: candidates.remove(presynapticCell) n = min(n, len(candidates)) candidates = sorted(candidates) cells = set() # Pick n cells randomly for _ in range(n): i = self._random.getUInt32(len(candidates)) cells.add(candidates[i]) del candidates[i] return cells def columnForCell(self, cell): """ Returns the index of the column that a cell belongs to. @param cell (int) Cell index @return (int) Column index """ self._validateCell(cell) return int(cell / self.cellsPerColumn) def cellsForColumn(self, column): """ Returns the indices of cells that belong to a column. @param column (int) Column index @return (set) Cell indices """ self._validateColumn(column) start = self.cellsPerColumn * column end = start + self.cellsPerColumn return set([cell for cell in range(start, end)]) def numberOfColumns(self): """ Returns the number of columns in this layer. @return (int) Number of columns """ return reduce(mul, self.columnDimensions, 1) def numberOfCells(self): """ Returns the number of cells in this layer. @return (int) Number of cells """ return self.numberOfColumns() * self.cellsPerColumn def mapCellsToColumns(self, cells): """ Maps cells to the columns they belong to @param cells (set) Cells @return (dict) Mapping from columns to their cells in `cells` """ cellsForColumns = defaultdict(set) for cell in cells: column = self.columnForCell(cell) cellsForColumns[column].add(cell) return cellsForColumns def _validateColumn(self, column): """ Raises an error if column index is invalid. @param column (int) Column index """ if column >= self.numberOfColumns() or column < 0: raise IndexError("Invalid column") def _validateCell(self, cell): """ Raises an error if cell index is invalid. @param cell (int) Cell index """ if cell >= self.numberOfCells() or cell < 0: raise IndexError("Invalid cell") @classmethod def getCellIndices(cls, cells): return [cls.getCellIndex(c) for c in cells] @staticmethod def getCellIndex(cell): return cell.idx
class KNNClassifierRegion(PyRegion): """ KNNClassifierRegion implements the k Nearest Neighbor classification algorithm. By default it will implement vanilla 1-nearest neighbor using the L2 (Euclidean) distance norm. There are options for using different norms as well as various ways of sparsifying the input. Note: categories are ints >= 0. """ __VERSION__ = 1 @classmethod def getSpec(cls): ns = dict( description=KNNClassifierRegion.__doc__, singleNodeOnly=True, inputs=dict( categoryIn=dict( description='Vector of categories of the input sample', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), bottomUpIn=dict( description='Belief values over children\'s groups', dataType='Real32', count=0, required=True, regionLevel=False, isDefaultInput=True, requireSplitterMap=False), partitionIn=dict( description='Partition ID of the input sample', dataType='Real32', count=0, required=True, regionLevel=True, isDefaultInput=False, requireSplitterMap=False), auxDataIn=dict( description='Auxiliary data from the sensor', dataType='Real32', count=0, required=False, regionLevel=True, isDefaultInput=False, requireSplitterMap=False) ), outputs=dict( categoriesOut=dict( description='A vector representing, for each category ' 'index, the likelihood that the input to the node belongs ' 'to that category based on the number of neighbors of ' 'that category that are among the nearest K.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), bestPrototypeIndices=dict( description='A vector that lists, in descending order of ' 'the match, the positions of the prototypes ' 'that best match the input pattern.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=False), categoryProbabilitiesOut=dict( description='A vector representing, for each category ' 'index, the probability that the input to the node belongs ' 'to that category based on the distance to the nearest ' 'neighbor of each category.', dataType='Real32', count=0, regionLevel=True, isDefaultOutput=True), ), parameters=dict( learningMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in learning mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=1, accessMode='ReadWrite'), inferenceMode=dict( description='Boolean (0/1) indicating whether or not a region ' 'is in inference mode.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='ReadWrite'), acceptanceProbability=dict( description='During learning, inputs are learned with ' 'probability equal to this parameter. ' 'If set to 1.0, the default, ' 'all inputs will be considered ' '(subject to other tests).', dataType='Real32', count=1, constraints='', defaultValue=1.0, #accessMode='Create'), accessMode='ReadWrite'), # and Create too confusion=dict( description='Confusion matrix accumulated during inference. ' 'Reset with reset(). This is available to Python ' 'client code only.', dataType='Handle', count=2, constraints='', defaultValue=None, accessMode='Read'), activeOutputCount=dict( description='The number of active elements in the ' '"categoriesOut" output.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Read'), categoryCount=dict( description='An integer indicating the number of ' 'categories that have been learned', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternCount=dict( description='Number of patterns learned by the classifier.', dataType='UInt32', count=1, constraints='', defaultValue=None, accessMode='Read'), patternMatrix=dict( description='The actual patterns learned by the classifier, ' 'returned as a matrix.', dataType='Handle', count=1, constraints='', defaultValue=None, accessMode='Read'), k=dict( description='The number of nearest neighbors to use ' 'during inference.', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), maxCategoryCount=dict( description='The maximal number of categories the ' 'classifier will distinguish between.', dataType='UInt32', count=1, constraints='', defaultValue=2, accessMode='Create'), distanceNorm=dict( description='The norm to use for a distance metric (i.e., ' 'the "p" in Lp-norm)', dataType='Real32', count=1, constraints='', defaultValue=2.0, accessMode='ReadWrite'), #accessMode='Create'), distanceMethod=dict( description='Method used to compute distances between inputs and' 'prototypes. Possible options are norm, rawOverlap, ' 'pctOverlapOfLarger, and pctOverlapOfProto', dataType="Byte", count=0, constraints='enum: norm, rawOverlap, pctOverlapOfLarger, ' 'pctOverlapOfProto, pctOverlapOfInput', defaultValue='norm', accessMode='ReadWrite'), outputProbabilitiesByDist=dict( description='If True, categoryProbabilitiesOut is the probability of ' 'each category based on the distance to the nearest neighbor of ' 'each category. If False, categoryProbabilitiesOut is the ' 'percentage of neighbors among the top K that are of each category.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), distThreshold=dict( description='Distance Threshold. If a pattern that ' 'is less than distThreshold apart from ' 'the input pattern already exists in the ' 'KNN memory, then the input pattern is ' 'not added to KNN memory.', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='ReadWrite'), inputThresh=dict( description='Input binarization threshold, used if ' '"doBinarization" is True.', dataType='Real32', count=1, constraints='', defaultValue=0.5, accessMode='Create'), doBinarization=dict( description='Whether or not to binarize the input vectors.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), useSparseMemory=dict( description='A boolean flag that determines whether or ' 'not the KNNClassifier will use sparse Memory', dataType='UInt32', count=1, constraints='', defaultValue=1, accessMode='Create'), sparseThreshold=dict( description='If sparse memory is used, input variables ' 'whose absolute value is less than this ' 'threshold will be stored as zero', dataType='Real32', count=1, constraints='', defaultValue=0.0, accessMode='Create'), relativeThreshold=dict( description='Whether to multiply sparseThreshold by max value ' ' in input', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), winnerCount=dict( description='Only this many elements of the input are ' 'stored. All elements are stored if 0.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), doSphering=dict( description='A boolean indicating whether or not data should' 'be "sphered" (i.e. each dimension should be normalized such' 'that its mean and variance are zero and one, respectively.) This' ' sphering normalization would be performed after all training ' 'samples had been received but before inference was performed. ' 'The dimension-specific normalization constants would then ' ' be applied to all future incoming vectors prior to performing ' ' conventional NN inference.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), SVDSampleCount=dict( description='If not 0, carries out SVD transformation after ' 'that many samples have been seen.', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), SVDDimCount=dict( description='Number of dimensions to keep after SVD if greater ' 'than 0. If set to -1 it is considered unspecified. ' 'If set to 0 it is consider "adaptive" and the number ' 'is chosen automatically.', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), fractionOfMax=dict( description='The smallest singular value which is retained ' 'as a fraction of the largest singular value. This is ' 'used only if SVDDimCount==0 ("adaptive").', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), useAuxiliary=dict( description='Whether or not the classifier should use auxiliary ' 'input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), justUseAuxiliary=dict( description='Whether or not the classifier should ONLUY use the ' 'auxiliary input data.', dataType='UInt32', count=1, constraints='bool', defaultValue=0, accessMode='Create'), clVerbosity=dict( description='An integer that controls the verbosity level, ' '0 means no verbose output, increasing integers ' 'provide more verbosity.', dataType='UInt32', count=1, constraints='', defaultValue=0 , accessMode='ReadWrite'), doSelfValidation=dict( description='A boolean flag that determines whether or' 'not the KNNClassifier should perform partitionID-based' 'self-validation during the finishLearning() step.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), keepAllDistances=dict( description='Whether to store all the protoScores in an array, ' 'rather than just the ones for the last inference. ' 'When this parameter is changed from True to False, ' 'all the scores are discarded except for the most ' 'recent one.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), replaceDuplicates=dict( description='A boolean flag that determines whether or' 'not the KNNClassifier should replace duplicates' 'during learning. This should be on when online' 'learning.', dataType='UInt32', count=1, constraints='bool', defaultValue=None, accessMode='ReadWrite'), cellsPerCol=dict( description='If >= 1, we assume the input is organized into columns, ' 'in the same manner as the temporal pooler AND ' 'whenever we store a new prototype, we only store the ' 'start cell (first cell) in any column which is bursting.' 'colum ', dataType='UInt32', count=1, constraints='', defaultValue=0, accessMode='Create'), maxStoredPatterns=dict( description='Limits the maximum number of the training patterns ' 'stored. When KNN learns in a fixed capacity mode, ' 'the unused patterns are deleted once the number ' 'of stored patterns is greater than maxStoredPatterns' 'columns. [-1 is no limit] ', dataType='Int32', count=1, constraints='', defaultValue=-1, accessMode='Create'), ), commands=dict() ) return ns def __init__(self, maxCategoryCount=0, bestPrototypeIndexCount=0, outputProbabilitiesByDist=False, k=1, distanceNorm=2.0, distanceMethod='norm', distThreshold=0.0, doBinarization=False, inputThresh=0.500, useSparseMemory=True, sparseThreshold=0.0, relativeThreshold=False, winnerCount=0, acceptanceProbability=1.0, seed=42, doSphering=False, SVDSampleCount=0, SVDDimCount=0, fractionOfMax=0, useAuxiliary=0, justUseAuxiliary=0, clVerbosity=0, doSelfValidation=False, replaceDuplicates=False, cellsPerCol=0, maxStoredPatterns=-1 ): self.version = KNNClassifierRegion.__VERSION__ # Convert various arguments to match the expectation # of the KNNClassifier if SVDSampleCount == 0: SVDSampleCount = None if SVDDimCount == -1: SVDDimCount = None elif SVDDimCount == 0: SVDDimCount = 'adaptive' if fractionOfMax == 0: fractionOfMax = None if useAuxiliary == 0: useAuxiliary = False if justUseAuxiliary == 0: justUseAuxiliary = False # KNN Parameters self.knnParams = dict( k=k, distanceNorm=distanceNorm, distanceMethod=distanceMethod, distThreshold=distThreshold, doBinarization=doBinarization, binarizationThreshold=inputThresh, useSparseMemory=useSparseMemory, sparseThreshold=sparseThreshold, relativeThreshold=relativeThreshold, numWinners=winnerCount, numSVDSamples=SVDSampleCount, numSVDDims=SVDDimCount, fractionOfMax=fractionOfMax, verbosity=clVerbosity, replaceDuplicates=replaceDuplicates, cellsPerCol=cellsPerCol, maxStoredPatterns=maxStoredPatterns ) # Initialize internal structures self.outputProbabilitiesByDist = outputProbabilitiesByDist self.learningMode = True self.inferenceMode = False self._epoch = 0 self.acceptanceProbability = acceptanceProbability self._rgen = Random(seed) self.confusion = numpy.zeros((1, 1)) self.keepAllDistances = False self._protoScoreCount = 0 self._useAuxiliary = useAuxiliary self._justUseAuxiliary = justUseAuxiliary # Sphering normalization self._doSphering = doSphering self._normOffset = None self._normScale = None self._samples = None self._labels = None # Debugging self.verbosity = clVerbosity # Boolean controlling whether or not the # region should perform partitionID-based # self-validation during the finishLearning() # step. self.doSelfValidation = doSelfValidation # Taps self._tapFileIn = None self._tapFileOut = None self._initEphemerals() self.maxStoredPatterns = maxStoredPatterns self.maxCategoryCount = maxCategoryCount self._bestPrototypeIndexCount = bestPrototypeIndexCount def _getEphemeralAttributes(self): """ List of attributes to not save with serialized state. """ return ['_firstComputeCall', '_accuracy', '_protoScores', '_categoryDistances'] def _initEphemerals(self): """ Initialize attributes that are not saved with the checkpoint. """ self._firstComputeCall = True self._accuracy = None self._protoScores = None self._categoryDistances = None self._knn = KNNClassifier.KNNClassifier(**self.knnParams) for x in ('_partitions', '_useAuxialiary', '_doSphering', '_scanInfo', '_protoScores', 'doSelfValidation'): if not hasattr(self, x): setattr(self, x, None) def __setstate__(self, state): """Set state from serialized state.""" if 'version' not in state: self.__dict__.update(state) elif state['version'] == 1: knnState = state['_knn_state'] del state['_knn_state'] self.__dict__.update(state) self._initEphemerals() self._knn.__setstate__(knnState) else: raise RuntimeError("Invalid KNNClassifierRegion version for __setstate__") # Set to current version self.version = KNNClassifierRegion.__VERSION__ def __getstate__(self): """Get serializable state.""" state = self.__dict__.copy() state['_knn_state'] = self._knn.__getstate__() del state['_knn'] for field in self._getEphemeralAttributes(): del state[field] return state def initialize(self, dims, splitterMaps): assert tuple(dims) == (1,) * len(dims) def _getActiveOutputCount(self): if self._knn._categoryList: return int(max(self._knn._categoryList)+1) else: return 0 activeOutputCount = property(fget=_getActiveOutputCount) def _getSeenCategoryCount(self): return len(set(self._knn._categoryList)) categoryCount = property(fget=_getSeenCategoryCount) def _getPatternMatrix(self): if self._knn._M is not None: return self._knn._M else: return self._knn._Memory def _getAccuracy(self): n = self.confusion.shape[0] assert n == self.confusion.shape[1], "Confusion matrix is non-square." return self.confusion[range(n), range(n)].sum(), self.confusion.sum() accuracy = property(fget=_getAccuracy) def clear(self): self._knn.clear() def getAlgorithmInstance(self): """Returns instance of the underlying KNNClassifier algorithm object.""" return self._knn def getParameter(self, name, index=-1): """ Get the value of the parameter. @param name -- the name of the parameter to retrieve, as defined by the Node Spec. """ if name == "patternCount": return self._knn._numPatterns elif name == "patternMatrix": return self._getPatternMatrix() elif name == "k": return self._knn.k elif name == "distanceNorm": return self._knn.distanceNorm elif name == "distanceMethod": return self._knn.distanceMethod elif name == "distThreshold": return self._knn.distThreshold elif name == "inputThresh": return self._knn.binarizationThreshold elif name == "doBinarization": return self._knn.doBinarization elif name == "useSparseMemory": return self._knn.useSparseMemory elif name == "sparseThreshold": return self._knn.sparseThreshold elif name == "winnerCount": return self._knn.numWinners elif name == "relativeThreshold": return self._knn.relativeThreshold elif name == "SVDSampleCount": v = self._knn.numSVDSamples return v if v is not None else 0 elif name == "SVDDimCount": v = self._knn.numSVDDims return v if v is not None else 0 elif name == "fractionOfMax": v = self._knn.fractionOfMax return v if v is not None else 0 elif name == "useAuxiliary": return self._useAuxiliary elif name == "justUseAuxiliary": return self._justUseAuxiliary elif name == "doSphering": return self._doSphering elif name == "cellsPerCol": return self._knn.cellsPerCol elif name == "maxStoredPatterns": return self.maxStoredPatterns elif name == 'categoryRecencyList': return self._knn._categoryRecencyList else: # If any spec parameter name is the same as an attribute, this call # will get it automatically, e.g. self.learningMode return PyRegion.getParameter(self, name, index) def setParameter(self, name, index, value): """ Set the value of the parameter. @param name -- the name of the parameter to update, as defined by the Node Spec. @param value -- the value to which the parameter is to be set. """ if name == "learningMode": if int(value) and not self.learningMode: self._restartLearning() self.learningMode = bool(int(value)) self._epoch = 0 elif name == "inferenceMode": self._epoch = 0 if int(value) and not self.inferenceMode: self._finishLearning() self.inferenceMode = bool(int(value)) elif name == "distanceNorm": self._knn.distanceNorm = value elif name == "distanceMethod": self._knn.distanceMethod = value elif name == "keepAllDistances": self.keepAllDistances = bool(value) if not self.keepAllDistances: # Discard all distances except the latest if self._protoScores is not None and self._protoScores.shape[0] > 1: self._protoScores = self._protoScores[-1,:] if self._protoScores is not None: self._protoScoreCount = 1 else: self._protoScoreCount = 0 elif name == "clVerbosity": self.verbosity = value self._knn.verbosity = value elif name == "doSelfValidation": self.doSelfValidation = value else: return PyRegion.setParameter(self, name, index, value) def reset(self): self.confusion = numpy.zeros((1, 1)) def doInference(self, activeInput): """Explicitly run inference on a vector that is passed in and return the category id. Useful for debugging.""" prediction, inference, allScores = self._knn.infer(activeInput) return inference def enableTap(self, tapPath): """ Begin writing output tap files. @param tapPath -- base name of the output tap files to write. """ self._tapFileIn = open(tapPath + '.in', 'w') self._tapFileOut = open(tapPath + '.out', 'w') def disableTap(self): """Disable writing of output tap files. """ if self._tapFileIn is not None: self._tapFileIn.close() self._tapFileIn = None if self._tapFileOut is not None: self._tapFileOut.close() self._tapFileOut = None def handleLogInput(self, inputs): """Write inputs to output tap file.""" if self._tapFileIn is not None: for input in inputs: for k in range(len(input)): print >> self._tapFileIn, input[k], print >> self._tapFileIn def handleLogOutput(self, output): """Write outputs to output tap file.""" #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') if self._tapFileOut is not None: for k in range(len(output)): print >> self._tapFileOut, output[k], print >> self._tapFileOut def _storeSample(self, inputVector, trueCatIndex, partition=0): """ Store a training sample and associated category label """ # If this is the first sample, then allocate a numpy array # of the appropriate size in which to store all samples. if self._samples is None: self._samples = numpy.zeros((0, len(inputVector)), dtype=RealNumpyDType) assert self._labels is None self._labels = [] # Add the sample vector and category lable self._samples = numpy.concatenate((self._samples, numpy.atleast_2d(inputVector)), axis=0) self._labels += [trueCatIndex] # Add the parition ID if self._partitions is None: self._partitions = [] if partition is None: partition = 0 self._partitions += [partition] def compute(self, inputs, outputs): """ Process one input sample. This method is called by the runtime engine. NOTE: the number of input categories may vary, but the array size is fixed to the max number of categories allowed (by a lower region), so "unused" indices of the input category array are filled with -1s. TODO: confusion matrix does not support multi-label classification """ #raise Exception('MULTI-LINE DUMMY\nMULTI-LINE DUMMY') #For backward compatibility if self._useAuxiliary is None: self._useAuxiliary = False # If the first time being called, then print potential warning messsages if self._firstComputeCall: self._firstComputeCall = False if self._useAuxiliary: #print "\n Auxiliary input stream from Image Sensor enabled." if self._justUseAuxiliary == True: print " Warning: You have chosen to ignore the image data and instead just use the auxiliary data stream." # Format inputs #childInputs = [x.wvector(0) for x in inputs["bottomUpIn"]] #inputVector = numpy.concatenate([x.array() for x in childInputs]) inputVector = inputs['bottomUpIn'] # Look for auxiliary input if self._useAuxiliary==True: #auxVector = inputs['auxDataIn'][0].wvector(0).array() auxVector = inputs['auxDataIn'] if auxVector.dtype != numpy.float32: raise RuntimeError, "KNNClassifierRegion expects numpy.float32 for the auxiliary data vector" if self._justUseAuxiliary == True: #inputVector = inputs['auxDataIn'][0].wvector(0).array() inputVector = inputs['auxDataIn'] else: #inputVector = numpy.concatenate([inputVector, inputs['auxDataIn'][0].wvector(0).array()]) inputVector = numpy.concatenate([inputVector, inputs['auxDataIn']]) # Logging #self.handleLogInput(childInputs) self.handleLogInput([inputVector]) # Read the category. assert "categoryIn" in inputs, "No linked category input." categories = inputs['categoryIn'] # Read the partition ID. if "partitionIn" in inputs: assert len(inputs["partitionIn"]) == 1, "Must have exactly one link to partition input." #partInput = inputs["partitionIn"][0].wvector() partInput = inputs['partitionIn'] assert len(partInput) == 1, "Partition input element count must be exactly 1." partition = int(partInput[0]) else: partition = None # --------------------------------------------------------------------- # Inference (can be done simultaneously with learning) if self.inferenceMode: categoriesOut = outputs['categoriesOut'] probabilitiesOut = outputs['categoryProbabilitiesOut'] # If we are sphering, then apply normalization if self._doSphering: inputVector = (inputVector + self._normOffset) * self._normScale nPrototypes = 0 if "bestPrototypeIndices" in outputs: #bestPrototypeIndicesOut = outputs["bestPrototypeIndices"].wvector() bestPrototypeIndicesOut = outputs["bestPrototypeIndices"] nPrototypes = len(bestPrototypeIndicesOut) winner, inference, protoScores, categoryDistances = \ self._knn.infer(inputVector, partitionId=partition) if not self.keepAllDistances: self._protoScores = protoScores else: # Keep all prototype scores in an array if self._protoScores is None: self._protoScores = numpy.zeros((1, protoScores.shape[0]), protoScores.dtype) self._protoScores[0,:] = protoScores#.reshape(1, protoScores.shape[0]) self._protoScoreCount = 1 else: if self._protoScoreCount == self._protoScores.shape[0]: # Double the size of the array newProtoScores = numpy.zeros((self._protoScores.shape[0] * 2, self._protoScores.shape[1]), self._protoScores.dtype) newProtoScores[:self._protoScores.shape[0],:] = self._protoScores self._protoScores = newProtoScores # Store the new prototype score self._protoScores[self._protoScoreCount,:] = protoScores self._protoScoreCount += 1 self._categoryDistances = categoryDistances # -------------------------------------------------------------------- # Compute the probability of each category if self.outputProbabilitiesByDist: scores = 1.0 - self._categoryDistances else: scores = inference # Probability is simply the scores/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) probabilities = numpy.ones(numScores) / numScores else: probabilities = scores / total #print "probabilities:", probabilities #import pdb; pdb.set_trace() # ------------------------------------------------------------------- # Fill the output vectors with our results nout = min(len(categoriesOut), len(inference)) categoriesOut.fill(0) categoriesOut[0:nout] = inference[0:nout] probabilitiesOut.fill(0) probabilitiesOut[0:nout] = probabilities[0:nout] if self.verbosity >= 1: print "KNNRegion: categoriesOut: ", categoriesOut[0:nout] print "KNNRegion: probabilitiesOut: ", probabilitiesOut[0:nout] if self._scanInfo is not None: self._scanResults = [tuple(inference[:nout])] # Update the stored confusion matrix. for category in categories: if category >= 0: dims = max(category+1, len(inference)) oldDims = len(self.confusion) if oldDims < dims: confusion = numpy.zeros((dims, dims)) confusion[0:oldDims, 0:oldDims] = self.confusion self.confusion = confusion self.confusion[inference.argmax(), category] += 1 # Calculate the best prototype indices if nPrototypes > 1: bestPrototypeIndicesOut.fill(0) if categoryDistances is not None: indices = categoryDistances.argsort() nout = min(len(indices), nPrototypes) bestPrototypeIndicesOut[0:nout] = indices[0:nout] elif nPrototypes == 1: if (categoryDistances is not None) and len(categoryDistances): bestPrototypeIndicesOut[0] = categoryDistances.argmin() else: bestPrototypeIndicesOut[0] = 0 # Logging self.handleLogOutput(inference) # --------------------------------------------------------------------- # Learning mode if self.learningMode: if (self.acceptanceProbability < 1.0) and \ (self._rgen.getReal64() > self.acceptanceProbability): pass else: # Accept the input for category in categories: if category >= 0: # category values of -1 are to be skipped (they are non-categories) if self._doSphering: # If we are sphering, then we can't provide the data to the KNN # library until we have computed per-dimension normalization # constants. So instead, we'll just store each training sample. self._storeSample(inputVector, category, partition) else: # Pass the raw training sample directly to the KNN library. self._knn.learn(inputVector, category, partition) self._epoch += 1 def getCategoryList(self): """ Public API for returning the category list This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is the category # of that stored prototype. """ return self._knn._categoryList def removeCategory(self, categoryToRemove): return self._knn.removeCategory(categoryToRemove) def getLatestDistances(self): """ Public API for returning the full scores (distance to each prototype) from the last compute() inference call. This is a required API of the NearestNeighbor inspector. It returns an array which has one entry per stored prototype. The value of the entry is distance of the most recenty inferred input from the stored prototype. """ if self._protoScores is not None: if self.keepAllDistances: return self._protoScores[self._protoScoreCount - 1,:] else: return self._protoScores else: return None def getAllDistances(self): """ Return all the prototype distances from all computes available. Like getLatestDistances, but returns all the scores if more than one set is available. getLatestDistances will always just return one set of scores. """ if self._protoScores is None: return None return self._protoScores[:self._protoScoreCount, :] def calculateProbabilities(self): # Get the scores, from 0 to 1 scores = 1.0 - self._categoryDistances # Probability is simply the score/scores.sum() total = scores.sum() if total == 0: numScores = len(scores) return numpy.ones(numScores) / numScores return scores / total def _restartLearning(self): """ Currently, we allow learning mode to be "re-started" after being ended, but only if PCA and sphering (if any) operations have already been completed (for the sake of simplicity.) """ self._knn.restartLearning() def _finishLearning(self): """Does nothing. Kept here for API compatibility """ if self._doSphering: self._finishSphering() self._knn.finishLearning() # Compute leave-one-out validation accuracy if # we actually received non-trivial partition info self._accuracy = None if self.doSelfValidation: #partitions = self._knn._partitionIdList #if len(set(partitions)) > 1: if self._knn._partitionIdArray is not None: numSamples, numCorrect = self._knn.leaveOneOutTest() if numSamples: self._accuracy = float(numCorrect) / float(numSamples) print "Leave-one-out validation: %d of %d correct ==> %.3f%%" % \ (numCorrect, numSamples, self._accuracy * 100.0) def _finishSphering(self): """ Compute normalization constants for each feature dimension based on the collected training samples. Then normalize our training samples using these constants (so that each input dimension has mean and variance of zero and one, respectively.) Then feed these "sphered" training samples into the underlying SVM model. """ # If we are sphering our data, we need to compute the # per-dimension normalization constants # First normalize the means (to zero) self._normOffset = self._samples.mean(axis=0) * -1.0 self._samples += self._normOffset # Now normalize the variances (to one). However, we need to be # careful because the variance could conceivably be zero for one # or more dimensions. variance = self._samples.var(axis=0) variance[numpy.where(variance == 0.0)] = 1.0 self._normScale = 1.0 / numpy.sqrt(variance) self._samples *= self._normScale # Now feed each "sphered" sample into the SVM library for sampleIndex in range(len(self._labels)): self._knn.learn(self._samples[sampleIndex], self._labels[sampleIndex], self._partitions[sampleIndex]) def _arraysToLists(self, samplesArray, labelsArray): labelsList = list(labelsArray) samplesList = [[float(y) for y in x] for x in [list(x) for x in samplesArray]] return samplesList, labelsList def getOutputElementCount(self, name): """This method will be called only when the node is used in nuPIC 2""" if name == 'categoriesOut': return self.maxCategoryCount elif name == 'categoryProbabilitiesOut': return self.maxCategoryCount elif name == 'bestPrototypeIndices': return self._bestPrototypeIndexCount if self._bestPrototypeIndexCount else 0 else: raise Exception('Unknown output: ' + name)
def __init__(self, maxCategoryCount=0, bestPrototypeIndexCount=0, outputProbabilitiesByDist=False, k=1, distanceNorm=2.0, distanceMethod='norm', distThreshold=0.0, doBinarization=False, inputThresh=0.500, useSparseMemory=True, sparseThreshold=0.0, relativeThreshold=False, winnerCount=0, acceptanceProbability=1.0, seed=42, doSphering=False, SVDSampleCount=0, SVDDimCount=0, fractionOfMax=0, useAuxiliary=0, justUseAuxiliary=0, clVerbosity=0, doSelfValidation=False, replaceDuplicates=False, cellsPerCol=0, maxStoredPatterns=-1 ): self.version = KNNClassifierRegion.__VERSION__ # Convert various arguments to match the expectation # of the KNNClassifier if SVDSampleCount == 0: SVDSampleCount = None if SVDDimCount == -1: SVDDimCount = None elif SVDDimCount == 0: SVDDimCount = 'adaptive' if fractionOfMax == 0: fractionOfMax = None if useAuxiliary == 0: useAuxiliary = False if justUseAuxiliary == 0: justUseAuxiliary = False # KNN Parameters self.knnParams = dict( k=k, distanceNorm=distanceNorm, distanceMethod=distanceMethod, distThreshold=distThreshold, doBinarization=doBinarization, binarizationThreshold=inputThresh, useSparseMemory=useSparseMemory, sparseThreshold=sparseThreshold, relativeThreshold=relativeThreshold, numWinners=winnerCount, numSVDSamples=SVDSampleCount, numSVDDims=SVDDimCount, fractionOfMax=fractionOfMax, verbosity=clVerbosity, replaceDuplicates=replaceDuplicates, cellsPerCol=cellsPerCol, maxStoredPatterns=maxStoredPatterns ) # Initialize internal structures self.outputProbabilitiesByDist = outputProbabilitiesByDist self.learningMode = True self.inferenceMode = False self._epoch = 0 self.acceptanceProbability = acceptanceProbability self._rgen = Random(seed) self.confusion = numpy.zeros((1, 1)) self.keepAllDistances = False self._protoScoreCount = 0 self._useAuxiliary = useAuxiliary self._justUseAuxiliary = justUseAuxiliary # Sphering normalization self._doSphering = doSphering self._normOffset = None self._normScale = None self._samples = None self._labels = None # Debugging self.verbosity = clVerbosity # Boolean controlling whether or not the # region should perform partitionID-based # self-validation during the finishLearning() # step. self.doSelfValidation = doSelfValidation # Taps self._tapFileIn = None self._tapFileOut = None self._initEphemerals() self.maxStoredPatterns = maxStoredPatterns self.maxCategoryCount = maxCategoryCount self._bestPrototypeIndexCount = bestPrototypeIndexCount
class SMSequences(object): """ Class generates sensorimotor sequences """ def __init__( self, sensoryInputElements, spatialConfig, sensoryInputElementsPool=list("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz0123456789"), minDisplacement=1, maxDisplacement=1, numActiveBitsSensoryInput=9, numActiveBitsMotorInput=9, seed=42, verbosity=False, useRandomEncoder=False, ): """ @param sensoryInputElements (list) Strings or numbers representing the sensory elements that exist in your world. Elements can be repeated if multiple of the same exist. @param spatialConfig (numpy.array) Array of size: (1, len(sensoryInputElements), dimension). It has a coordinate for every element in sensoryInputElements. @param sensoryInputElementsPool (list) List of strings representing a readable version of all possible sensory elements in this world. Elements don't need to be in any order and there should be no duplicates. By default this contains the set of alphanumeric characters. @param maxDisplacement (int) Maximum `distance` for a motor command. Distance is defined by the largest difference along any coordinate dimension. @param minDisplacement (int) Minimum `distance` for a motor command. Distance is defined by the largest difference along any coordinate dimension. @param numActiveBitsSensoryInput (int) Number of active bits for each sensory input. @param numActiveBitsMotorInput (int) Number of active bits for each dimension of the motor input. @param seed (int) Random seed for nupic.bindings.Random. @param verbosity (int) Verbosity @param useRandomEncoder (boolean) if True, use the random encoder SDRCategoryEncoder. If False, use CategoryEncoder. CategoryEncoder encodes categories using contiguous non-overlapping bits for each category, which makes it easier to debug. """ # --------------------------------------------------------------------------------- # Store creation parameters self.sensoryInputElements = sensoryInputElements self.sensoryInputElementsPool = sensoryInputElementsPool self.spatialConfig = spatialConfig.astype(int) self.spatialLength = len(spatialConfig) self.maxDisplacement = maxDisplacement self.minDisplacement = minDisplacement self.numActiveBitsSensoryInput = numActiveBitsSensoryInput self.numActiveBitsMotorInput = numActiveBitsMotorInput self.verbosity = verbosity self.seed = seed self.initialize(useRandomEncoder) def initialize(self, useRandomEncoder): """ Initialize the various data structures. """ self.setRandomSeed(self.seed) self.dim = numpy.shape(self.spatialConfig)[-1] self.spatialMap = dict(zip(map(tuple, list(self.spatialConfig)), self.sensoryInputElements)) self.lengthMotorInput1D = (2 * self.maxDisplacement + 1) * self.numActiveBitsMotorInput uniqueSensoryElements = list(set(self.sensoryInputElementsPool)) if useRandomEncoder: self.sensoryEncoder = SDRCategoryEncoder( n=1024, w=self.numActiveBitsSensoryInput, categoryList=uniqueSensoryElements, forced=True ) self.lengthSensoryInput = self.sensoryEncoder.getWidth() else: self.lengthSensoryInput = (len(self.sensoryInputElementsPool) + 1) * self.numActiveBitsSensoryInput self.sensoryEncoder = CategoryEncoder( w=self.numActiveBitsSensoryInput, categoryList=uniqueSensoryElements, forced=True ) motorEncoder1D = ScalarEncoder( n=self.lengthMotorInput1D, w=self.numActiveBitsMotorInput, minval=-self.maxDisplacement, maxval=self.maxDisplacement, clipInput=True, forced=True, ) self.motorEncoder = VectorEncoder(length=self.dim, encoder=motorEncoder1D) def generateSensorimotorSequence(self, sequenceLength): """ Generate sensorimotor sequences of length sequenceLength. @param sequenceLength (int) Length of the sensorimotor sequence. @return (tuple) Contains: sensorySequence (list) Encoded sensory input for whole sequence. motorSequence (list) Encoded motor input for whole sequence. sensorimotorSequence (list) Encoder sensorimotor input for whole sequence. This is useful when you want to give external input to temporal memory. """ motorSequence = [] sensorySequence = [] sensorimotorSequence = [] currentEyeLoc = self.nupicRandomChoice(self.spatialConfig) for i in xrange(sequenceLength): currentSensoryInput = self.spatialMap[tuple(currentEyeLoc)] nextEyeLoc, currentEyeV = self.getNextEyeLocation(currentEyeLoc) if self.verbosity: print "sensory input = ", currentSensoryInput, "eye location = ", currentEyeLoc, " motor command = ", currentEyeV sensoryInput = self.encodeSensoryInput(currentSensoryInput) motorInput = self.encodeMotorInput(list(currentEyeV)) sensorimotorInput = numpy.concatenate((sensoryInput, motorInput)) sensorySequence.append(sensoryInput) motorSequence.append(motorInput) sensorimotorSequence.append(sensorimotorInput) currentEyeLoc = nextEyeLoc return (sensorySequence, motorSequence, sensorimotorSequence) def encodeSensorimotorSequence(self, eyeLocs): """ Encode sensorimotor sequence given the eye movements. Sequence will have length len(eyeLocs) - 1 because only the differences of eye locations can be used to encoder motor commands. @param eyeLocs (list) Numpy coordinates describing where the eye is looking. @return (tuple) Contains: sensorySequence (list) Encoded sensory input for whole sequence. motorSequence (list) Encoded motor input for whole sequence. sensorimotorSequence (list) Encoder sensorimotor input for whole sequence. This is useful when you want to give external input to temporal memory. """ sequenceLength = len(eyeLocs) - 1 motorSequence = [] sensorySequence = [] sensorimotorSequence = [] for i in xrange(sequenceLength): currentEyeLoc = eyeLocs[i] nextEyeLoc = eyeLocs[i + 1] currentSensoryInput = self.spatialMap[currentEyeLoc] currentEyeV = nextEyeLoc - currentEyeLoc if self.verbosity: print "sensory input = ", currentSensoryInput, "eye location = ", currentEyeLoc, " motor command = ", currentEyeV sensoryInput = self.encodeSensoryInput(currentSensoryInput) motorInput = self.encodeMotorInput(list(currentEyeV)) sensorimotorInput = numpy.concatenate((sensoryInput, motorInput)) sensorySequence.append(sensoryInput) motorSequence.append(motorInput) sensorimotorSequence.append(sensorimotorInput) return (sensorySequence, motorSequence, sensorimotorSequence) def getNextEyeLocation(self, currentEyeLoc): """ Generate next eye location based on current eye location. @param currentEyeLoc (numpy.array) Current coordinate describing the eye location in the world. @return (tuple) Contains: nextEyeLoc (numpy.array) Coordinate of the next eye location. eyeDiff (numpy.array) Vector describing change from currentEyeLoc to nextEyeLoc. """ possibleEyeLocs = [] for loc in self.spatialConfig: shift = abs(max(loc - currentEyeLoc)) if self.minDisplacement <= shift <= self.maxDisplacement: possibleEyeLocs.append(loc) nextEyeLoc = self.nupicRandomChoice(possibleEyeLocs) eyeDiff = nextEyeLoc - currentEyeLoc return nextEyeLoc, eyeDiff def setRandomSeed(self, seed): """ Reset the nupic random generator. This is necessary to reset random seed to generate new sequences. @param seed (int) Seed for nupic.bindings.Random. """ self.seed = seed self._random = Random() self._random.setSeed(seed) def nupicRandomChoice(self, array): """ Chooses a random element from an array using the nupic random number generator. @param array (list or numpy.array) Array to choose random element from. @return (element) Element chosen at random. """ return array[self._random.getUInt32(len(array))] def encodeMotorInput(self, motorInput): """ Encode motor command to bit vector. @param motorInput (1D numpy.array) Motor command to be encoded. @return (1D numpy.array) Encoded motor command. """ if not hasattr(motorInput, "__iter__"): motorInput = list([motorInput]) return self.motorEncoder.encode(motorInput) def decodeMotorInput(self, motorInputPattern): """ Decode motor command from bit vector. @param motorInputPattern (1D numpy.array) Encoded motor command. @return (1D numpy.array) Decoded motor command. """ key = self.motorEncoder.decode(motorInputPattern)[0].keys()[0] motorCommand = self.motorEncoder.decode(motorInputPattern)[0][key][1][0] return motorCommand def encodeSensoryInput(self, sensoryInputElement): """ Encode sensory input to bit vector @param sensoryElement (1D numpy.array) Sensory element to be encoded. @return (1D numpy.array) Encoded sensory element. """ return self.sensoryEncoder.encode(sensoryInputElement) def decodeSensoryInput(self, sensoryInputPattern): """ Decode sensory input from bit vector. @param sensoryInputPattern (1D numpy.array) Encoded sensory element. @return (1D numpy.array) Decoded sensory element. """ return self.sensoryEncoder.decode(sensoryInputPattern)[0]["category"][1] def printSensoryCodingScheme(self): """ Print sensory inputs along with their encoded versions. """ print "\nsensory coding scheme: " for loc in self.spatialConfig: sensoryElement = self.spatialMap[tuple(loc)] print sensoryElement, "%s : " % loc, printSequence(self.encodeSensoryInput(sensoryElement)) def printMotorCodingScheme(self): """ Print motor commands (displacement vector) along with their encoded versions. """ print "\nmotor coding scheme: " self.build(self.dim, []) def build(self, n, vec): """ Recursive function to help print motor coding scheme. """ for i in range(-self.maxDisplacement, self.maxDisplacement + 1): next = vec + [i] if n == 1: print "{:>5}\t".format(next), " = ", printSequence(self.encodeMotorInput(next)) else: self.build(n - 1, next)
class TemporalMemory(object): """ Class implementing the Temporal Memory algorithm. """ def __init__(self, columnDimensions=(2048,), cellsPerColumn=32, activationThreshold=13, initialPermanence=0.21, connectedPermanence=0.50, minThreshold=10, maxNewSynapseCount=20, permanenceIncrement=0.10, permanenceDecrement=0.10, predictedSegmentDecrement=0.0, maxSegmentsPerCell=255, maxSynapsesPerSegment=255, seed=42, **kwargs): """ @param columnDimensions (list) Dimensions of the column space @param cellsPerColumn (int) Number of cells per column @param activationThreshold (int) If the number of active connected synapses on a segment is at least this threshold, the segment is said to be active. @param initialPermanence (float) Initial permanence of a new synapse. @param connectedPermanence (float) If the permanence value for a synapse is greater than this value, it is said to be connected. @param minThreshold (int) If the number of synapses active on a segment is at least this threshold, it is selected as the best matching cell in a bursting column. @param maxNewSynapseCount (int) The maximum number of synapses added to a segment during learning. @param permanenceIncrement (float) Amount by which permanences of synapses are incremented during learning. @param permanenceDecrement (float) Amount by which permanences of synapses are decremented during learning. @param predictedSegmentDecrement (float) Amount by which active permanences of synapses of previously predicted but inactive segments are decremented. @param seed (int) Seed for the random number generator. Notes: predictedSegmentDecrement: A good value is just a bit larger than (the column-level sparsity * permanenceIncrement). So, if column-level sparsity is 2% and permanenceIncrement is 0.01, this parameter should be something like 4% * 0.01 = 0.0004). """ # Error checking if not len(columnDimensions): raise ValueError("Number of column dimensions must be greater than 0") if not cellsPerColumn > 0: raise ValueError("Number of cells per column must be greater than 0") # TODO: Validate all parameters (and add validation tests) # Save member variables self.columnDimensions = columnDimensions self.cellsPerColumn = cellsPerColumn self.activationThreshold = activationThreshold self.initialPermanence = initialPermanence self.connectedPermanence = connectedPermanence self.minThreshold = minThreshold self.maxNewSynapseCount = maxNewSynapseCount self.permanenceIncrement = permanenceIncrement self.permanenceDecrement = permanenceDecrement self.predictedSegmentDecrement = predictedSegmentDecrement # Initialize member variables self.connections = Connections(self.numberOfCells(), maxSegmentsPerCell=maxSegmentsPerCell, maxSynapsesPerSegment=maxSynapsesPerSegment) self._random = Random(seed) self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() self.matchingSegments = set() self.matchingCells = set() # ============================== # Main functions # ============================== def compute(self, activeColumns, learn=True): """ Feeds input record through TM, performing inference and learning. @param activeColumns (set) Indices of active columns @param learn (bool) Whether or not learning is enabled Updates member variables: - `activeCells` (set) - `winnerCells` (set) - `activeSegments` (set) - `predictiveCells` (set) - `matchingSegments`(set) - `matchingCells` (set) """ prevPredictiveCells = self.predictiveCells prevActiveSegments = self.activeSegments prevActiveCells = self.activeCells prevWinnerCells = self.winnerCells prevMatchingSegments = self.matchingSegments prevMatchingCells = self.matchingCells activeCells = set() winnerCells = set() (_activeCells, _winnerCells, predictedActiveColumns, predictedInactiveCells) = self.activateCorrectlyPredictiveCells( prevPredictiveCells, prevMatchingCells, activeColumns) activeCells.update(_activeCells) winnerCells.update(_winnerCells) (_activeCells, _winnerCells, learningSegments) = self.burstColumns(activeColumns, predictedActiveColumns, prevActiveCells, prevWinnerCells, self.connections) activeCells.update(_activeCells) winnerCells.update(_winnerCells) if learn: self.learnOnSegments(prevActiveSegments, learningSegments, prevActiveCells, winnerCells, prevWinnerCells, self.connections, predictedInactiveCells, prevMatchingSegments) (activeSegments, predictiveCells, matchingSegments, matchingCells) = self.computePredictiveCells(activeCells, self.connections) self.activeCells = activeCells self.winnerCells = winnerCells self.activeSegments = activeSegments self.predictiveCells = predictiveCells self.matchingSegments = matchingSegments self.matchingCells = matchingCells def reset(self): """ Indicates the start of a new sequence. Resets sequence state of the TM. """ self.activeCells = set() self.predictiveCells = set() self.activeSegments = set() self.winnerCells = set() # ============================== # Phases # ============================== def activateCorrectlyPredictiveCells(self, prevPredictiveCells, prevMatchingCells, activeColumns): """ Phase 1: Activate the correctly predictive cells. Pseudocode: - for each prev predictive cell - if in active column - mark it as active - mark it as winner cell - mark column as predicted => active - if not in active column - mark it as an predicted but inactive cell @param prevPredictiveCells (set) Indices of predictive cells in `t-1` @param activeColumns (set) Indices of active columns in `t` @return (tuple) Contains: `activeCells` (set), `winnerCells` (set), `predictedActiveColumns` (set), `predictedInactiveCells` (set) """ activeCells = set() winnerCells = set() predictedActiveColumns = set() predictedInactiveCells = set() for cell in prevPredictiveCells: column = self.columnForCell(cell) if column in activeColumns: activeCells.add(cell) winnerCells.add(cell) predictedActiveColumns.add(column) if self.predictedSegmentDecrement > 0: for cell in prevMatchingCells: column = self.columnForCell(cell) if column not in activeColumns: predictedInactiveCells.add(cell) return (activeCells, winnerCells, predictedActiveColumns, predictedInactiveCells) def burstColumns(self, activeColumns, predictedActiveColumns, prevActiveCells, prevWinnerCells, connections): """ Phase 2: Burst unpredicted columns. Pseudocode: - for each unpredicted active column - mark all cells as active - mark the best matching cell as winner cell - (learning) - if it has no matching segment - (optimization) if there are prev winner cells - add a segment to it - mark the segment as learning @param activeColumns (set) Indices of active columns in `t` @param predictedActiveColumns (set) Indices of predicted => active columns in `t` @param prevActiveCells (set) Indices of active cells in `t-1` @param prevWinnerCells (set) Indices of winner cells in `t-1` @param connections (Connections) Connectivity of layer @return (tuple) Contains: `activeCells` (set), `winnerCells` (set), `learningSegments` (set) """ activeCells = set() winnerCells = set() learningSegments = set() unpredictedActiveColumns = activeColumns - predictedActiveColumns # Sort unpredictedActiveColumns before iterating for compatibility with C++ for column in sorted(unpredictedActiveColumns): cells = self.cellsForColumn(column) activeCells.update(cells) (bestCell, bestSegment) = self.bestMatchingCell(cells, prevActiveCells, connections) winnerCells.add(bestCell) if bestSegment is None and len(prevWinnerCells): bestSegment = connections.createSegment(bestCell) if bestSegment is not None: learningSegments.add(bestSegment) return activeCells, winnerCells, learningSegments def learnOnSegments(self, prevActiveSegments, learningSegments, prevActiveCells, winnerCells, prevWinnerCells, connections, predictedInactiveCells, prevMatchingSegments): """ Phase 3: Perform learning by adapting segments. Pseudocode: - (learning) for each prev active or learning segment - if learning segment or from winner cell - strengthen active synapses - weaken inactive synapses - if learning segment - add some synapses to the segment - subsample from prev winner cells - if predictedSegmentDecrement > 0 - for each previously matching segment - if cell is a predicted inactive cell - weaken active synapses but don't touch inactive synapses @param prevActiveSegments (set) Indices of active segments in `t-1` @param learningSegments (set) Indices of learning segments in `t` @param prevActiveCells (set) Indices of active cells in `t-1` @param winnerCells (set) Indices of winner cells in `t` @param prevWinnerCells (set) Indices of winner cells in `t-1` @param connections (Connections) Connectivity of layer @param predictedInactiveCells (set) Indices of predicted inactive cells @param prevMatchingSegments (set) Indices of segments with """ segments = prevActiveSegments | learningSegments # Sort segments before iterating for compatibility with C++ # Sort with primary key = cell idx, secondary key = segment idx segments = sorted( segments, key=lambda segment: (connections.cellForSegment(segment), segment)) for segment in segments: isLearningSegment = segment in learningSegments isFromWinnerCell = connections.cellForSegment(segment) in winnerCells activeSynapses = self.activeSynapsesForSegment( segment, prevActiveCells, connections) if isLearningSegment or isFromWinnerCell: self.adaptSegment(segment, activeSynapses, connections, self.permanenceIncrement, self.permanenceDecrement) if isLearningSegment: n = self.maxNewSynapseCount - len(activeSynapses) # Fix for NUP #3268 is commented out for now until test failures are # addressed. # n = min(self.maxNewSynapseCount, # connections.maxSynapsesPerSegment # - len(connections.synapsesForSegment(segment))) for presynapticCell in self.pickCellsToLearnOn(n, segment, prevWinnerCells, connections): connections.createSynapse(segment, presynapticCell, self.initialPermanence) if self.predictedSegmentDecrement > 0: for segment in prevMatchingSegments: isPredictedInactiveCell = connections.cellForSegment(segment) in predictedInactiveCells activeSynapses = self.activeSynapsesForSegment( segment, prevActiveCells, connections) if isPredictedInactiveCell: self.adaptSegment(segment, activeSynapses, connections, -self.predictedSegmentDecrement, 0.0) def computePredictiveCells(self, activeCells, connections): """ Phase 4: Compute predictive cells due to lateral input on distal dendrites. Pseudocode: - for each distal dendrite segment with activity >= activationThreshold - mark the segment as active - mark the cell as predictive - if predictedSegmentDecrement > 0 - for each distal dendrite segment with unconnected activity >= minThreshold - mark the segment as matching - mark the cell as matching Forward propagates activity from active cells to the synapses that touch them, to determine which synapses are active. @param activeCells (set) Indices of active cells in `t` @param connections (Connections) Connectivity of layer @return (tuple) Contains: `activeSegments` (set), `predictiveCells` (set), `matchingSegments` (set), `matchingCells` (set) """ numActiveConnectedSynapsesForSegment = defaultdict(int) numActiveSynapsesForSegment = defaultdict(int) activeSegments = set() predictiveCells = set() matchingSegments = set() matchingCells = set() for cell in activeCells: for synapseData in connections.synapsesForPresynapticCell(cell).values(): segment = synapseData.segment permanence = synapseData.permanence if permanence >= self.connectedPermanence: numActiveConnectedSynapsesForSegment[segment] += 1 if (numActiveConnectedSynapsesForSegment[segment] >= self.activationThreshold): activeSegments.add(segment) predictiveCells.add(connections.cellForSegment(segment)) if permanence > 0 and self.predictedSegmentDecrement > 0: numActiveSynapsesForSegment[segment] += 1 if numActiveSynapsesForSegment[segment] >= self.minThreshold: matchingSegments.add(segment) matchingCells.add(connections.cellForSegment(segment)) return activeSegments, predictiveCells, matchingSegments, matchingCells # ============================== # Helper functions # ============================== def bestMatchingCell(self, cells, activeCells, connections): """ Gets the cell with the best matching segment (see `TM.bestMatchingSegment`) that has the largest number of active synapses of all best matching segments. If none were found, pick the least used cell (see `TM.leastUsedCell`). @param cells (set) Indices of cells @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (tuple) Contains: `cell` (int), `bestSegment` (int) """ maxSynapses = 0 bestCell = None bestSegment = None for cell in cells: segment, numActiveSynapses = self.bestMatchingSegment( cell, activeCells, connections) if segment is not None and numActiveSynapses > maxSynapses: maxSynapses = numActiveSynapses bestCell = cell bestSegment = segment if bestCell is None: bestCell = self.leastUsedCell(cells, connections) return bestCell, bestSegment def bestMatchingSegment(self, cell, activeCells, connections): """ Gets the segment on a cell with the largest number of activate synapses, including all synapses with non-zero permanences. @param cell (int) Cell index @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (tuple) Contains: `segment` (int), `connectedActiveSynapses` (set) """ maxSynapses = self.minThreshold bestSegment = None bestNumActiveSynapses = None for segment in connections.segmentsForCell(cell): numActiveSynapses = 0 for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) if ( (synapseData.presynapticCell in activeCells) and synapseData.permanence > 0): numActiveSynapses += 1 if numActiveSynapses >= maxSynapses: maxSynapses = numActiveSynapses bestSegment = segment bestNumActiveSynapses = numActiveSynapses return bestSegment, bestNumActiveSynapses def leastUsedCell(self, cells, connections): """ Gets the cell with the smallest number of segments. Break ties randomly. @param cells (set) Indices of cells @param connections (Connections) Connectivity of layer @return (int) Cell index """ leastUsedCells = set() minNumSegments = float("inf") for cell in cells: numSegments = len(connections.segmentsForCell(cell)) if numSegments < minNumSegments: minNumSegments = numSegments leastUsedCells = set() if numSegments == minNumSegments: leastUsedCells.add(cell) i = self._random.getUInt32(len(leastUsedCells)) return sorted(leastUsedCells)[i] @staticmethod def activeSynapsesForSegment(segment, activeCells, connections): """ Returns the synapses on a segment that are active due to lateral input from active cells. @param segment (int) Segment index @param activeCells (set) Indices of active cells @param connections (Connections) Connectivity of layer @return (set) Indices of active synapses on segment """ synapses = set() for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) if synapseData.presynapticCell in activeCells: synapses.add(synapse) return synapses def adaptSegment(self, segment, activeSynapses, connections, permanenceIncrement, permanenceDecrement): """ Updates synapses on segment. Strengthens active synapses; weakens inactive synapses. @param segment (int) Segment index @param activeSynapses (set) Indices of active synapses @param connections (Connections) Connectivity of layer @param permanenceIncrement (float) Amount to increment active synapses @param permanenceDecrement (float) Amount to decrement inactive synapses """ # Need to copy synapses for segment set below because it will be modified # during iteration by `destroySynapse` for synapse in set(connections.synapsesForSegment(segment)): synapseData = connections.dataForSynapse(synapse) permanence = synapseData.permanence if synapse in activeSynapses: permanence += permanenceIncrement else: permanence -= permanenceDecrement # Keep permanence within min/max bounds permanence = max(0.0, min(1.0, permanence)) if (permanence < EPSILON): connections.destroySynapse(synapse) else: connections.updateSynapsePermanence(synapse, permanence) def pickCellsToLearnOn(self, n, segment, winnerCells, connections): """ Pick cells to form distal connections to. TODO: Respect topology and learningRadius @param n (int) Number of cells to pick @param segment (int) Segment index @param winnerCells (set) Indices of winner cells in `t` @param connections (Connections) Connectivity of layer @return (set) Indices of cells picked """ candidates = set(winnerCells) # Remove cells that are already synapsed on by this segment for synapse in connections.synapsesForSegment(segment): synapseData = connections.dataForSynapse(synapse) presynapticCell = synapseData.presynapticCell if presynapticCell in candidates: candidates.remove(presynapticCell) n = min(n, len(candidates)) candidates = sorted(candidates) cells = set() # Pick n cells randomly for _ in range(n): i = self._random.getUInt32(len(candidates)) cells.add(candidates[i]) del candidates[i] return cells def columnForCell(self, cell): """ Returns the index of the column that a cell belongs to. @param cell (int) Cell index @return (int) Column index """ self._validateCell(cell) return int(cell / self.cellsPerColumn) def cellsForColumn(self, column): """ Returns the indices of cells that belong to a column. @param column (int) Column index @return (set) Cell indices """ self._validateColumn(column) start = self.cellsPerColumn * self.getCellIndex(column) end = start + self.cellsPerColumn return set(xrange(start, end)) def numberOfColumns(self): """ Returns the number of columns in this layer. @return (int) Number of columns """ return reduce(mul, self.columnDimensions, 1) def numberOfCells(self): """ Returns the number of cells in this layer. @return (int) Number of cells """ return self.numberOfColumns() * self.cellsPerColumn def getActiveCells(self): """ Returns the indices of the active cells. @return (list) Indices of active cells. """ return self.getCellIndices(self.activeCells) def getPredictiveCells(self): """ Returns the indices of the predictive cells. @return (list) Indices of predictive cells. """ return self.getCellIndices(self.predictiveCells) def getWinnerCells(self): """ Returns the indices of the winner cells. @return (list) Indices of winner cells. """ return self.getCellIndices(self.winnerCells) def getMatchingCells(self): """ Returns the indices of the matching cells. @return (list) Indices of matching cells. """ return self.getCellIndices(self.matchingCells) def getColumnDimensions(self): """ Returns the dimensions of the columns in the region. @return (tuple) Column dimensions """ return self.columnDimensions def getCellsPerColumn(self): """ Returns the number of cells per column. @return (int) The number of cells per column. """ return self.cellsPerColumn def getActivationThreshold(self): """ Returns the activation threshold. @return (int) The activation threshold. """ return self.activationThreshold def setActivationThreshold(self, activationThreshold): """ Sets the activation threshold. @param activationThreshold (int) activation threshold. """ self.activationThreshold = activationThreshold def getInitialPermanence(self): """ Get the initial permanence. @return (float) The initial permanence. """ return self.initialPermanence def setInitialPermanence(self, initialPermanence): """ Sets the initial permanence. @param initialPermanence (float) The initial permanence. """ self.initialPermanence = initialPermanence def getMinThreshold(self): """ Returns the min threshold. @return (int) The min threshold. """ return self.minThreshold def setMinThreshold(self, minThreshold): """ Sets the min threshold. @param minThreshold (int) min threshold. """ self.minThreshold = minThreshold def getMaxNewSynapseCount(self): """ Returns the max new synapse count. @return (int) The max new synapse count. """ return self.maxNewSynapseCount def setMaxNewSynapseCount(self, maxNewSynapseCount): """ Sets the max new synapse count. @param maxNewSynapseCount (int) Max new synapse count. """ self.maxNewSynapseCount = maxNewSynapseCount def getPermanenceIncrement(self): """ Get the permanence increment. @return (float) The permanence increment. """ return self.permanenceIncrement def setPermanenceIncrement(self, permanenceIncrement): """ Sets the permanence increment. @param permanenceIncrement (float) The permanence increment. """ self.permanenceIncrement = permanenceIncrement def getPermanenceDecrement(self): """ Get the permanence decrement. @return (float) The permanence decrement. """ return self.permanenceDecrement def setPermanenceDecrement(self, permanenceDecrement): """ Sets the permanence decrement. @param permanenceDecrement (float) The permanence decrement. """ self.permanenceDecrement = permanenceDecrement def getPredictedSegmentDecrement(self): """ Get the predicted segment decrement. @return (float) The predicted segment decrement. """ return self.predictedSegmentDecrement def setPredictedSegmentDecrement(self, predictedSegmentDecrement): """ Sets the predicted segment decrement. @param predictedSegmentDecrement (float) The predicted segment decrement. """ self.predictedSegmentDecrement = predictedSegmentDecrement def getConnectedPermanence(self): """ Get the connected permanence. @return (float) The connected permanence. """ return self.connectedPermanence def setConnectedPermanence(self, connectedPermanence): """ Sets the connected permanence. @param connectedPermanence (float) The connected permanence. """ self.connectedPermanence = connectedPermanence def mapCellsToColumns(self, cells): """ Maps cells to the columns they belong to @param cells (set) Cells @return (dict) Mapping from columns to their cells in `cells` """ cellsForColumns = defaultdict(set) for cell in cells: column = self.columnForCell(cell) cellsForColumns[column].add(cell) return cellsForColumns def write(self, proto): """ Writes serialized data to proto object @param proto (DynamicStructBuilder) Proto object """ proto.columnDimensions = self.columnDimensions proto.cellsPerColumn = self.cellsPerColumn proto.activationThreshold = self.activationThreshold proto.initialPermanence = self.initialPermanence proto.connectedPermanence = self.connectedPermanence proto.minThreshold = self.minThreshold proto.maxNewSynapseCount = self.maxNewSynapseCount proto.permanenceIncrement = self.permanenceIncrement proto.permanenceDecrement = self.permanenceDecrement proto.predictedSegmentDecrement = self.predictedSegmentDecrement self.connections.write(proto.connections) self._random.write(proto.random) proto.activeCells = list(self.activeCells) proto.predictiveCells = list(self.predictiveCells) proto.activeSegments = list(self.activeSegments) proto.winnerCells = list(self.winnerCells) proto.matchingSegments = list(self.matchingSegments) proto.matchingCells = list(self.matchingCells) @classmethod def read(cls, proto): """ Reads deserialized data from proto object @param proto (DynamicStructBuilder) Proto object @return (TemporalMemory) TemporalMemory instance """ tm = object.__new__(cls) tm.columnDimensions = list(proto.columnDimensions) tm.cellsPerColumn = int(proto.cellsPerColumn) tm.activationThreshold = int(proto.activationThreshold) tm.initialPermanence = proto.initialPermanence tm.connectedPermanence = proto.connectedPermanence tm.minThreshold = int(proto.minThreshold) tm.maxNewSynapseCount = int(proto.maxNewSynapseCount) tm.permanenceIncrement = proto.permanenceIncrement tm.permanenceDecrement = proto.permanenceDecrement tm.predictedSegmentDecrement = proto.predictedSegmentDecrement tm.connections = Connections.read(proto.connections) tm._random = Random() tm._random.read(proto.random) tm.activeCells = set([int(x) for x in proto.activeCells]) tm.predictiveCells = set([int(x) for x in proto.predictiveCells]) tm.activeSegments = set([int(x) for x in proto.activeSegments]) tm.winnerCells = set([int(x) for x in proto.winnerCells]) tm.matchingSegments = set([int(x) for x in proto.matchingSegments]) tm.matchingCells = set([int(x) for x in proto.matchingCells]) return tm def __eq__(self, other): """ Equality operator for TemporalMemory instances. Checks if two instances are functionally identical (might have different internal state). @param other (TemporalMemory) TemporalMemory instance to compare to """ if self.columnDimensions != other.columnDimensions: return False if self.cellsPerColumn != other.cellsPerColumn: return False if self.activationThreshold != other.activationThreshold: return False if abs(self.initialPermanence - other.initialPermanence) > EPSILON: return False if abs(self.connectedPermanence - other.connectedPermanence) > EPSILON: return False if self.minThreshold != other.minThreshold: return False if self.maxNewSynapseCount != other.maxNewSynapseCount: return False if abs(self.permanenceIncrement - other.permanenceIncrement) > EPSILON: return False if abs(self.permanenceDecrement - other.permanenceDecrement) > EPSILON: return False if abs(self.predictedSegmentDecrement - other.predictedSegmentDecrement) > EPSILON: return False if self.connections != other.connections: return False if self.activeCells != other.activeCells: return False if self.predictiveCells != other.predictiveCells: return False if self.winnerCells != other.winnerCells: return False if self.matchingSegments != other.matchingSegments: return False if self.matchingCells != other.matchingCells: return False return True def __ne__(self, other): """ Non-equality operator for TemporalMemory instances. Checks if two instances are not functionally identical (might have different internal state). @param other (TemporalMemory) TemporalMemory instance to compare to """ return not self.__eq__(other) def _validateColumn(self, column): """ Raises an error if column index is invalid. @param column (int) Column index """ if column >= self.numberOfColumns() or column < 0: raise IndexError("Invalid column") def _validateCell(self, cell): """ Raises an error if cell index is invalid. @param cell (int) Cell index """ if cell >= self.numberOfCells() or cell < 0: raise IndexError("Invalid cell") @classmethod def getCellIndices(cls, cells): return [cls.getCellIndex(c) for c in cells] @staticmethod def getCellIndex(cell): return cell
class SequenceMachine(object): """ Base sequence machine class. """ def __init__(self, patternMachine, seed=42): """ @param patternMachine (PatternMachine) Pattern machine instance """ # Save member variables self.patternMachine = patternMachine # Initialize member variables self._random = Random(seed) def generateFromNumbers(self, numbers): """ Generate a sequence from a list of numbers. Note: Any `None` in the list of numbers is considered a reset. @param numbers (list) List of numbers @return (list) Generated sequence """ sequence = [] for number in numbers: if number is None: sequence.append(number) else: pattern = self.patternMachine.get(number) sequence.append(pattern) return sequence def addSpatialNoise(self, sequence, amount): """ Add spatial noise to each pattern in the sequence. @param sequence (list) Sequence @param amount (float) Amount of spatial noise @return (list) Sequence with spatial noise """ newSequence = [] for pattern in sequence: if pattern is not None: pattern = self.patternMachine.addNoise(pattern, amount) newSequence.append(pattern) return newSequence def prettyPrintSequence(self, sequence, verbosity=1): """ Pretty print a sequence. @param sequence (list) Sequence @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ text = "" for i in xrange(len(sequence)): pattern = sequence[i] if pattern is None: text += "<reset>" if i < len(sequence) - 1: text += "\n" else: text += self.patternMachine.prettyPrintPattern(pattern, verbosity=verbosity) return text def generateNumbers(self, numSequences, sequenceLength, sharedRange=None): """ @param numSequences (int) Number of sequences to return, separated by None @param sequenceLength (int) Length of each sequence @param sharedRange (tuple) (start index, end index) indicating range of shared subsequence in each sequence (None if no shared subsequences) @return (list) Numbers representing sequences """ numbers = [] if sharedRange: sharedStart, sharedEnd = sharedRange sharedLength = sharedEnd - sharedStart sharedNumbers = range(numSequences * sequenceLength, numSequences * sequenceLength + sharedLength) for i in xrange(numSequences): start = i * sequenceLength newNumbers = np.array(range(start, start + sequenceLength), np.uint32) self._random.shuffle(newNumbers) newNumbers = list(newNumbers) if sharedRange is not None: newNumbers[sharedStart:sharedEnd] = sharedNumbers numbers += newNumbers numbers.append(None) return numbers
class PatternMachine(object): """ Base pattern machine class. """ def __init__(self, n, w, num=100, seed=42): """ @param n (int) Number of available bits in pattern @param w (int/list) Number of on bits in pattern If list, each pattern will have a `w` randomly selected from the list. @param num (int) Number of available patterns """ # Save member variables self._n = n self._w = w self._num = num # Initialize member variables self._random = Random(seed) self._patterns = dict() self._generate() def get(self, number): """ Return a pattern for a number. @param number (int) Number of pattern @return (set) Indices of on bits """ if not number in self._patterns: raise IndexError("Invalid number") return self._patterns[number] def addNoise(self, bits, amount): """ Add noise to pattern. @param bits (set) Indices of on bits @param amount (float) Probability of switching an on bit with a random bit @return (set) Indices of on bits in noisy pattern """ newBits = set() for bit in bits: if self._random.getReal64() < amount: newBits.add(self._random.getUInt32(self._n)) else: newBits.add(bit) return newBits def numbersForBit(self, bit): """ Return the set of pattern numbers that match a bit. @param bit (int) Index of bit @return (set) Indices of numbers """ if bit >= self._n: raise IndexError("Invalid bit") numbers = set() for index, pattern in self._patterns.iteritems(): if bit in pattern: numbers.add(index) return numbers def numberMapForBits(self, bits): """ Return a map from number to matching on bits, for all numbers that match a set of bits. @param bits (set) Indices of bits @return (dict) Mapping from number => on bits. """ numberMap = dict() for bit in bits: numbers = self.numbersForBit(bit) for number in numbers: if not number in numberMap: numberMap[number] = set() numberMap[number].add(bit) return numberMap def prettyPrintPattern(self, bits, verbosity=1): """ Pretty print a pattern. @param bits (set) Indices of on bits @param verbosity (int) Verbosity level @return (string) Pretty-printed text """ numberMap = self.numberMapForBits(bits) text = "" numberList = [] numberItems = sorted(numberMap.iteritems(), key=lambda (number, bits): len(bits), reverse=True) for number, bits in numberItems: if verbosity > 2: strBits = [str(n) for n in bits] numberText = "{0} (bits: {1})".format(number, ",".join(strBits)) elif verbosity > 1: numberText = "{0} ({1} bits)".format(number, len(bits)) else: numberText = str(number) numberList.append(numberText) text += "[{0}]".format(", ".join(numberList)) return text def _generate(self): """ Generates set of random patterns. """ candidates = np.array(range(self._n), np.uint32) for i in xrange(self._num): self._random.shuffle(candidates) pattern = candidates[0:self._getW()] self._patterns[i] = set(pattern) def _getW(self): """ Gets a value of `w` for use in generating a pattern. """ w = self._w if type(w) is list: return w[self._random.getUInt32(len(w))] else: return w
class SDRCategoryEncoder(Encoder): """ Encodes a list of discrete categories (described by strings), that aren't related to each other. Each encoding is an SDR in which w out of n bits are turned on. Unknown categories are encoded as a single value. Internally we use a :class:`.ScalarEncoder` with a radius of 1, but since we only encode integers, we never get mixture outputs. The :class:`.CategoryEncoder` uses a different method to encode categories :param categoryList: list of discrete string categories, if ``None`` then categories will automatically be added as they are encountered :param forced: if True, skip checks for parameters' settings; see :class:`.ScalarEncoder` for details. (default False) """ def __init__(self, n, w, categoryList = None, name="category", verbosity=0, encoderSeed=1, forced=False): self.n = n self.w = w self._learningEnabled = True # initialize the random number generators self._seed(encoderSeed) if not forced: # -- this is just to catch bad parameter choices if (self.n/self.w) < 2: # w is 50% of total len raise ValueError("Number of ON bits in SDR (%d) must be much smaller than " "the output width (%d)" % (self.w, self.n)) # Another arbitrary cutoff to catch likely mistakes if self.w < 21: raise ValueError("Number of bits in the SDR (%d) must be greater than 2, and should be >= 21, pass forced=True to init() to override this check" % self.w) self._initOverlap() self.verbosity = verbosity self.description = [(name, 0)] self.name = name self.categoryToIndex = dict() self.ncategories = 0 self.categories = list() self.sdrs = None # Always include an 'unknown' category for # edge cases self._addCategory("<UNKNOWN>") if categoryList is None: self._learningEnabled = True else: self._learningEnabled = False for category in categoryList: self._addCategory(category) assert self.ncategories == len(categoryList) + 1 # Not used by this class. Used for decoding (scalarsToStr()) self.encoders = None # This matrix is used for the topDownCompute. We build it the first time # topDownCompute is called self._topDownMappingM = None self._topDownValues = None def _initOverlap(self): # Calculate average overlap of SDRs for decoding # Density is fraction of bits on, and it is also the # probability that any individual bit is on. density = float(self.w) / self.n self.averageOverlap = self.w * density # We can do a better job of calculating the threshold. For now, just # something quick and dirty, which is the midway point between average # and full overlap. averageOverlap is always < w, so the threshold # is always < w. self.thresholdOverlap = int((self.averageOverlap + self.w)/2) # 1.25 -- too sensitive for decode test, so make it less sensitive if self.thresholdOverlap < self.w - 3: self.thresholdOverlap = self.w - 3 def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxint)) def _seed(self, seed=-1): """ Initialize the random seed """ if seed != -1: self.random = NupicRandom(seed) else: self.random = NupicRandom() def getDecoderOutputFieldTypes(self): """ [Encoder class virtual method override] """ # TODO: change back to string meta-type after the decoding logic is fixed # to output strings instead of internal index values. return (FieldMetaType.string,) #return (FieldMetaType.integer,) def _addCategory(self, category): if category in self.categories: raise RuntimeError("Attempt to add add encoder category '%s' " "that already exists" % category) if self.sdrs is None: assert self.ncategories == 0 assert len(self.categoryToIndex) == 0 # Initial allocation -- 16 rows self.sdrs = numpy.zeros((16, self.n), dtype='uint8') elif self.ncategories > self.sdrs.shape[0] - 2: # Preallocated sdrs are used up. Double our size currentMax = self.sdrs.shape[0] newsdrs = numpy.zeros((currentMax * 2, self.n), dtype='uint8') newsdrs[0:currentMax] = self.sdrs[0:currentMax] self.sdrs = newsdrs newrep = self._newRep() self.sdrs[self.ncategories] = newrep self.categories.append(category) self.categoryToIndex[category] = self.ncategories self.ncategories += 1 self._topDownMappingM = None def _newRep(self): """Generate a new and unique representation. Returns a numpy array of shape (n,). """ maxAttempts = 1000 for _ in xrange(maxAttempts): foundUnique = True population = numpy.arange(self.n, dtype=numpy.uint32) choices = numpy.arange(self.w, dtype=numpy.uint32) oneBits = sorted(self.random.sample(population, choices)) sdr = numpy.zeros(self.n, dtype='uint8') sdr[oneBits] = 1 for i in xrange(self.ncategories): if (sdr == self.sdrs[i]).all(): foundUnique = False break if foundUnique: break; if not foundUnique: raise RuntimeError("Error, could not find unique pattern %d after " "%d attempts" % (self.ncategories, maxAttempts)) return sdr def getWidth(self): return self.n def getDescription(self): return self.description def getScalars(self, input): """ See method description in base.py """ if input == SENTINEL_VALUE_FOR_MISSING_DATA: return numpy.array([0]) index = self.categoryToIndex.get(input, None) if index is None: if self._learningEnabled: self._addCategory(input) index = self.ncategories - 1 else: # if not found, we encode category 0 index = 0 return numpy.array([index]) def getBucketIndices(self, input): """ See method description in base.py """ # For category encoder, the "scalar" we map to each category is the # bucket index return self.getScalars(input) def encodeIntoArray(self, input, output): if input == SENTINEL_VALUE_FOR_MISSING_DATA: output[0:self.n] = 0 index = 0 else: index = self.getBucketIndices(input)[0] output[0:self.n] = self.sdrs[index,:] if self.verbosity >= 2: print "input:", input, "index:", index, "output:", output print "decoded:", self.decodedToStr(self.decode(output)) def decode(self, encoded, parentFieldName=''): """ See the function description in base.py """ assert (encoded[0:self.n] <= 1.0).all() resultString = "" resultRanges = [] overlaps = (self.sdrs * encoded[0:self.n]).sum(axis=1) if self.verbosity >= 2: print "Overlaps for decoding:" for i in xrange(0, self.ncategories): print "%d %s" % (overlaps[i], self.categories[i]) matchingCategories = (overlaps > self.thresholdOverlap).nonzero()[0] for index in matchingCategories: if resultString != "": resultString += " " resultString += str(self.categories[index]) resultRanges.append([int(index),int(index)]) if parentFieldName != '': fieldName = "%s.%s" % (parentFieldName, self.name) else: fieldName = self.name return ({fieldName: (resultRanges, resultString)}, [fieldName]) def _getTopDownMapping(self): """ Return the interal _topDownMappingM matrix used for handling the bucketInfo() and topDownCompute() methods. This is a matrix, one row per category (bucket) where each row contains the encoded output for that category. """ # ------------------------------------------------------------------------- # Do we need to build up our reverse mapping table? if self._topDownMappingM is None: # Each row represents an encoded output pattern self._topDownMappingM = SM32(self.ncategories, self.n) outputSpace = numpy.zeros(self.n, dtype=GetNTAReal()) for i in xrange(self.ncategories): self.encodeIntoArray(self.categories[i], outputSpace) self._topDownMappingM.setRowFromDense(i, outputSpace) return self._topDownMappingM def getBucketValues(self): """ See the function description in base.py """ return self.categories def getBucketInfo(self, buckets): """ See the function description in base.py """ if self.ncategories==0: return 0 topDownMappingM = self._getTopDownMapping() categoryIndex = buckets[0] category = self.categories[categoryIndex] encoding = topDownMappingM.getRow(categoryIndex) return [EncoderResult(value=category, scalar=categoryIndex, encoding=encoding)] def topDownCompute(self, encoded): """ See the function description in base.py """ if self.ncategories==0: return 0 topDownMappingM = self._getTopDownMapping() categoryIndex = topDownMappingM.rightVecProd(encoded).argmax() category = self.categories[categoryIndex] encoding = topDownMappingM.getRow(categoryIndex) return EncoderResult(value=category, scalar=categoryIndex, encoding=encoding) def closenessScores(self, expValues, actValues, fractional=True): """ See the function description in base.py kwargs will have the keyword "fractional", which is ignored by this encoder """ expValue = expValues[0] actValue = actValues[0] if expValue == actValue: closeness = 1.0 else: closeness = 0.0 if not fractional: closeness = 1.0 - closeness return numpy.array([closeness]) @classmethod def getSchema(cls): return SDRCategoryEncoderProto @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.n = proto.n encoder.w = proto.w encoder.random = NupicRandom() encoder.random.read(proto.random) encoder.verbosity = proto.verbosity encoder.name = proto.name encoder.description = [(proto.name, 0)] encoder.categories = list(proto.categories) encoder.sdrs = numpy.array(proto.sdrs, dtype=numpy.uint8) encoder.categoryToIndex = {category:index for index, category in enumerate(encoder.categories)} encoder.ncategories = len(encoder.categories) encoder._learningEnabled = proto.learningEnabled encoder._initOverlap() encoder._topDownMappingM = None encoder._topDownValues = None encoder.encoders = None return encoder def write(self, proto): proto.n = self.n proto.w = self.w self.random.write(proto.random) proto.verbosity = self.verbosity proto.name = self.name proto.categories = self.categories proto.sdrs = self.sdrs.tolist() proto.learningEnabled = self._learningEnabled
class RandomDistributedScalarEncoder(Encoder): """ A scalar encoder encodes a numeric (floating point) value into an array of bits. This class maps a scalar value into a random distributed representation that is suitable as scalar input into the spatial pooler. The encoding scheme is designed to replace a simple ScalarEncoder. It preserves the important properties around overlapping representations. Unlike ScalarEncoder the min and max range can be dynamically increased without any negative effects. The only required parameter is resolution, which determines the resolution of input values. Scalar values are mapped to a bucket. The class maintains a random distributed encoding for each bucket. The following properties are maintained by RandomDistributedEncoder: 1) Similar scalars should have high overlap. Overlap should decrease smoothly as scalars become less similar. Specifically, neighboring bucket indices must overlap by a linearly decreasing number of bits. 2) Dissimilar scalars should have very low overlap so that the SP does not confuse representations. Specifically, buckets that are more than w indices apart should have at most maxOverlap bits of overlap. We arbitrarily (and safely) define "very low" to be 2 bits of overlap or lower. Properties 1 and 2 lead to the following overlap rules for buckets i and j: If abs(i-j) < w then: overlap(i,j) = w - abs(i-j) else: overlap(i,j) <= maxOverlap 3) The representation for a scalar must not change during the lifetime of the object. Specifically, as new buckets are created and the min/max range is extended, the representation for previously in-range sscalars and previously created buckets must not change. """ def __init__(self, resolution, w=21, n=400, name=None, offset=None, seed=42, verbosity=0): """Constructor @param resolution A floating point positive number denoting the resolution of the output representation. Numbers within [offset-resolution/2, offset+resolution/2] will fall into the same bucket and thus have an identical representation. Adjacent buckets will differ in one bit. resolution is a required parameter. @param w Number of bits to set in output. w must be odd to avoid centering problems. w must be large enough that spatial pooler columns will have a sufficiently large overlap to avoid false matches. A value of w=21 is typical. @param n Number of bits in the representation (must be > w). n must be large enough such that there is enough room to select new representations as the range grows. With w=21 a value of n=400 is typical. The class enforces n > 6*w. @param name An optional string which will become part of the description. @param offset A floating point offset used to map scalar inputs to bucket indices. The middle bucket will correspond to numbers in the range [offset - resolution/2, offset + resolution/2). If set to None, the very first input that is encoded will be used to determine the offset. @param seed The seed used for numpy's random number generator. If set to -1 the generator will be initialized without a fixed seed. @param verbosity An integer controlling the level of debugging output. A value of 0 implies no output. verbosity=1 may lead to one-time printouts during construction, serialization or deserialization. verbosity=2 may lead to some output per encode operation. verbosity>2 may lead to significantly more output. """ # Validate inputs if (w <= 0) or (w%2 == 0): raise ValueError("w must be an odd positive integer") if resolution <= 0: raise ValueError("resolution must be a positive number") if (n <= 6*w) or (not isinstance(n, int)): raise ValueError("n must be an int strictly greater than 6*w. For " "good results we recommend n be strictly greater " "than 11*w") self.encoders = None self.verbosity = verbosity self.w = w self.n = n self.resolution = float(resolution) # The largest overlap we allow for non-adjacent encodings self._maxOverlap = 2 # initialize the random number generators self._seed(seed) # Internal parameters for bucket mapping self.minIndex = None self.maxIndex = None self._offset = None self._initializeBucketMap(INITIAL_BUCKETS, offset) # A name used for debug printouts if name is not None: self.name = name else: self.name = "[%s]" % (self.resolution) if self.verbosity > 0: self.dump() def __setstate__(self, state): self.__dict__.update(state) # Initialize self.random as an instance of NupicRandom derived from the # previous numpy random state randomState = state["random"] if isinstance(randomState, numpy.random.mtrand.RandomState): self.random = NupicRandom(randomState.randint(sys.maxint)) def _seed(self, seed=-1): """ Initialize the random seed """ if seed != -1: self.random = NupicRandom(seed) else: self.random = NupicRandom() def getDecoderOutputFieldTypes(self): """ See method description in base.py """ return (FieldMetaType.float, ) def getWidth(self): """ See method description in base.py """ return self.n def getDescription(self): return [(self.name, 0)] def getBucketIndices(self, x): """ See method description in base.py """ if ((isinstance(x, float) and math.isnan(x)) or x == SENTINEL_VALUE_FOR_MISSING_DATA): return [None] if self._offset is None: self._offset = x bucketIdx = ( (self._maxBuckets/2) + int(round((x - self._offset) / self.resolution)) ) if bucketIdx < 0: bucketIdx = 0 elif bucketIdx >= self._maxBuckets: bucketIdx = self._maxBuckets-1 return [bucketIdx] def mapBucketIndexToNonZeroBits(self, index): """ Given a bucket index, return the list of non-zero bits. If the bucket index does not exist, it is created. If the index falls outside our range we clip it. @param index The bucket index to get non-zero bits for. @returns numpy array of indices of non-zero bits for specified index. """ if index < 0: index = 0 if index >= self._maxBuckets: index = self._maxBuckets-1 if not self.bucketMap.has_key(index): if self.verbosity >= 2: print "Adding additional buckets to handle index=", index self._createBucket(index) return self.bucketMap[index] def encodeIntoArray(self, x, output): """ See method description in base.py """ if x is not None and not isinstance(x, numbers.Number): raise TypeError( "Expected a scalar input but got input of type %s" % type(x)) # Get the bucket index to use bucketIdx = self.getBucketIndices(x)[0] # None is returned for missing value in which case we return all 0's. output[0:self.n] = 0 if bucketIdx is not None: output[self.mapBucketIndexToNonZeroBits(bucketIdx)] = 1 def _createBucket(self, index): """ Create the given bucket index. Recursively create as many in-between bucket indices as necessary. """ if index < self.minIndex: if index == self.minIndex - 1: # Create a new representation that has exactly w-1 overlapping bits # as the min representation self.bucketMap[index] = self._newRepresentation(self.minIndex, index) self.minIndex = index else: # Recursively create all the indices above and then this index self._createBucket(index+1) self._createBucket(index) else: if index == self.maxIndex + 1: # Create a new representation that has exactly w-1 overlapping bits # as the max representation self.bucketMap[index] = self._newRepresentation(self.maxIndex, index) self.maxIndex = index else: # Recursively create all the indices below and then this index self._createBucket(index-1) self._createBucket(index) def _newRepresentation(self, index, newIndex): """ Return a new representation for newIndex that overlaps with the representation at index by exactly w-1 bits """ newRepresentation = self.bucketMap[index].copy() # Choose the bit we will replace in this representation. We need to shift # this bit deterministically. If this is always chosen randomly then there # is a 1 in w chance of the same bit being replaced in neighboring # representations, which is fairly high ri = newIndex % self.w # Now we choose a bit such that the overlap rules are satisfied. newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit while newBit in self.bucketMap[index] or \ not self._newRepresentationOK(newRepresentation, newIndex): self.numTries += 1 newBit = self.random.getUInt32(self.n) newRepresentation[ri] = newBit return newRepresentation def _newRepresentationOK(self, newRep, newIndex): """ Return True if this new candidate representation satisfies all our overlap rules. Since we know that neighboring representations differ by at most one bit, we compute running overlaps. """ if newRep.size != self.w: return False if (newIndex < self.minIndex-1) or (newIndex > self.maxIndex+1): raise ValueError("newIndex must be within one of existing indices") # A binary representation of newRep. We will use this to test containment newRepBinary = numpy.array([False]*self.n) newRepBinary[newRep] = True # Midpoint midIdx = self._maxBuckets/2 # Start by checking the overlap at minIndex runningOverlap = self._countOverlap(self.bucketMap[self.minIndex], newRep) if not self._overlapOK(self.minIndex, newIndex, overlap=runningOverlap): return False # Compute running overlaps all the way to the midpoint for i in range(self.minIndex+1, midIdx+1): # This is the bit that is going to change newBit = (i-1)%self.w # Update our running overlap if newRepBinary[self.bucketMap[i-1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False # At this point, runningOverlap contains the overlap for midIdx # Compute running overlaps all the way to maxIndex for i in range(midIdx+1, self.maxIndex+1): # This is the bit that is going to change newBit = i%self.w # Update our running overlap if newRepBinary[self.bucketMap[i-1][newBit]]: runningOverlap -= 1 if newRepBinary[self.bucketMap[i][newBit]]: runningOverlap += 1 # Verify our rules if not self._overlapOK(i, newIndex, overlap=runningOverlap): return False return True def _countOverlapIndices(self, i, j): """ Return the overlap between bucket indices i and j """ if self.bucketMap.has_key(i) and self.bucketMap.has_key(j): iRep = self.bucketMap[i] jRep = self.bucketMap[j] return self._countOverlap(iRep, jRep) else: raise ValueError("Either i or j don't exist") @staticmethod def _countOverlap(rep1, rep2): """ Return the overlap between two representations. rep1 and rep2 are lists of non-zero indices. """ overlap = 0 for e in rep1: if e in rep2: overlap += 1 return overlap def _overlapOK(self, i, j, overlap=None): """ Return True if the given overlap between bucket indices i and j are acceptable. If overlap is not specified, calculate it from the bucketMap """ if overlap is None: overlap = self._countOverlapIndices(i, j) if abs(i-j) < self.w: if overlap == (self.w - abs(i-j)): return True else: return False else: if overlap <= self._maxOverlap: return True else: return False def _initializeBucketMap(self, maxBuckets, offset): """ Initialize the bucket map assuming the given number of maxBuckets. """ # The first bucket index will be _maxBuckets / 2 and bucket indices will be # allowed to grow lower or higher as long as they don't become negative. # _maxBuckets is required because the current CLA Classifier assumes bucket # indices must be non-negative. This normally does not need to be changed # but if altered, should be set to an even number. self._maxBuckets = maxBuckets self.minIndex = self._maxBuckets / 2 self.maxIndex = self._maxBuckets / 2 # The scalar offset used to map scalar values to bucket indices. The middle # bucket will correspond to numbers in the range # [offset-resolution/2, offset+resolution/2). # The bucket index for a number x will be: # maxBuckets/2 + int( round( (x-offset)/resolution ) ) self._offset = offset # This dictionary maps a bucket index into its bit representation # We initialize the class with a single bucket with index 0 self.bucketMap = {} def _permutation(n): r = numpy.arange(n, dtype=numpy.uint32) self.random.shuffle(r) return r self.bucketMap[self.minIndex] = _permutation(self.n)[0:self.w] # How often we need to retry when generating valid encodings self.numTries = 0 def dump(self): print "RandomDistributedScalarEncoder:" print " minIndex: %d" % self.minIndex print " maxIndex: %d" % self.maxIndex print " w: %d" % self.w print " n: %d" % self.getWidth() print " resolution: %g" % self.resolution print " offset: %s" % str(self._offset) print " numTries: %d" % self.numTries print " name: %s" % self.name if self.verbosity > 2: print " All buckets: " pprint.pprint(self.bucketMap) @classmethod def read(cls, proto): encoder = object.__new__(cls) encoder.resolution = proto.resolution encoder.w = proto.w encoder.n = proto.n encoder.name = proto.name encoder._offset = proto.offset encoder.random = NupicRandom() encoder.random.read(proto.random) encoder.resolution = proto.resolution encoder.verbosity = proto.verbosity encoder.minIndex = proto.minIndex encoder.maxIndex = proto.maxIndex encoder.encoders = None encoder._maxBuckets = INITIAL_BUCKETS encoder.bucketMap = {x.key: numpy.array(x.value, dtype=numpy.uint32) for x in proto.bucketMap} return encoder def write(self, proto): proto.resolution = self.resolution proto.w = self.w proto.n = self.n proto.name = self.name proto.offset = self._offset self.random.write(proto.random) proto.verbosity = self.verbosity proto.minIndex = self.minIndex proto.maxIndex = self.maxIndex proto.bucketMap = [{"key": key, "value": value.tolist()} for key, value in self.bucketMap.items()]