def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
              callsPerSerialize=CALLS_PER_SERIALIZE):
   self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity)
   self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha,
                                               verbosity)
   self._calls = 0
   self._callsPerSerialize = callsPerSerialize
Пример #2
0
def sdrClassifierExample():
    # http://nupic.docs.numenta.org/stable/api/algorithms/classifiers.html
    """steps - Sequence of the different steps of multi-step predictions
    to learn
    alpha - learning rate (larger -> faster learning)
    actValueAlpha - Used to track the actual value within each bucket.
    A lower actValueAlpha results in longer term memory"""
    c = SDRClassifier(steps=[1], alpha=0.1, actValueAlpha=0.1, verbosity=0)

    # learning
    c.compute(recordNum=0,
              patternNZ=[1, 5, 9],
              classification={
                  "bucketIdx": 4,
                  "actValue": 34.7
              },
              learn=True,
              infer=False)

    # inference
    result = c.compute(recordNum=1,
                       patternNZ=[1, 5, 9],
                       classification={
                           "bucketIdx": 4,
                           "actValue": 34.7
                       },
                       learn=False,
                       infer=True)

    # Print the top three predictions for 1 steps out.
    topPredictions = sorted(zip(result[1], result["actualValues"]),
                            reverse=True)[:3]
    for prob, value in topPredictions:
        print("Prediction of {} has prob: {}.".format(value, prob * 100.0))
Пример #3
0
 def testSoftMaxOverflow(self):
     """
 Test if the softmax normalization overflows
 """
     c = SDRClassifier([1], 1.0, 0.1, 0)
     weight = numpy.array([[sys.float_info.max_exp + 1]])
     res = c.inferSingleStep([0], weight)
     self.assertFalse(numpy.isnan(res), "SoftMax overflow")
Пример #4
0
 def testSoftMaxOverflow(self):
   """
   Test if the softmax normalization overflows
   """
   c = SDRClassifier([1], 1.0, 0.1, 0)
   weight = numpy.array([[sys.float_info.max_exp + 1]])
   res = c.inferSingleStep([0], weight)
   self.assertFalse(numpy.isnan(res), "SoftMax overflow")
Пример #5
0
def initializeClassifiers(Nelements, encoder):
  claClassiiier = CLAClassifier(steps=[0])

  sdrClassifier = SDRClassifier(steps=[0], alpha=0.1)

  patternNZ = list(numpy.where(encoder.encode(Nelements - 1))[0])
  classification = {'bucketIdx': Nelements - 1, 'actValue': Nelements - 1}

  # feed in the pattern with the highest bucket index
  claRetval = claClassiiier.compute(0, patternNZ, classification,
                                    learn=True, infer=True)
  sdrRetval = sdrClassifier.compute(0, patternNZ, classification,
                                    learn=True, infer=True)
  return claClassiiier, sdrClassifier
Пример #6
0
class SDRClassifierDiff(object):
  """Classifier-like object that diffs the output from different classifiers.

  Instances of each version of the SDR classifier are created and each call to
  compute is passed to each version of the classifier. The results are diffed
  to make sure the there are no differences.

  Optionally, the classifiers can be serialized and deserialized after a
  specified number of calls to compute to ensure that serialization does not
  cause discrepencies between the results.

  TODO: Check internal state as well.
  TODO: Provide option to write output to a file.
  TODO: Provide record differences without throwing an exception.
  """


  __VERSION__ = 'SDRClassifierDiffV1'


  def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
               callsPerSerialize=CALLS_PER_SERIALIZE):
    self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity)
    self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha,
                                                verbosity)
    self._calls = 0
    self._callsPerSerialize = callsPerSerialize


  def compute(self, recordNum, patternNZ, classification, learn, infer):
    result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification,
                                          learn, infer)
    result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ,
                                              classification, learn, infer)
    self._calls += 1
    # Check if it is time to serialize and deserialize.
    if self._calls % self._callsPerSerialize == 0:
      self._sdrClassifier = pickle.loads(pickle.dumps(self._sdrClassifier))
      self._sdrClassifierCpp = pickle.loads(pickle.dumps(
          self._sdrClassifierCpp))
    # Assert both results are the same type.
    assert type(result1) == type(result2)
    # Assert that the keys match.
    assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
      "py result=%s, C++ result=%s" % (result1, result2)
    # Assert that the values match.
    for k, l in result1.iteritems():
      assert type(l) == type(result2[k])
      for i in xrange(len(l)):
        if isinstance(classification['actValue'], numbers.Real):
          assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, (
              'Python SDRClassifier has value %f and C++ SDRClassifierCpp has '
              'value %f.' % (l[i], result2[k][i]))
        else:
          assert l[i] == result2[k][i], (
              'Python SDRClassifier has value %s and C++ SDRClassifierCpp has '
              'value %s.' % (str(l[i]), str(result2[k][i])))
    return result1
Пример #7
0
 def read(proto):
     """
 proto: SDRClassifierRegionProto capnproto object
 """
     impl = proto.classifierImp
     if impl == "py":
         return SDRClassifier.read(proto.claClassifier)
     else:
         raise ValueError("Invalid classifier implementation (%r). Value must be " '"py".' % impl)
Пример #8
0
 def read(proto):
   """
   proto: SDRClassifierRegionProto capnproto object
   """
   impl = proto.implementation
   if impl == 'py':
     return SDRClassifier.read(proto.sdrClassifier)
   else:
     raise ValueError('Invalid classifier implementation (%r). Value must be '
                      '"py".' % impl)
Пример #9
0
 def read(proto):
     """
 proto: SDRClassifierRegionProto capnproto object
 """
     impl = proto.classifierImp
     if impl == 'py':
         return SDRClassifier.read(proto.claClassifier)
     else:
         raise ValueError(
             'Invalid classifier implementation (%r). Value must be '
             '"py".' % impl)
 def read(proto):
     """
 proto: SDRClassifierRegionProto capnproto object
 """
     impl = proto.implementation
     if impl == "py":
         return SDRClassifier.read(proto.sdrClassifier)
     elif impl == "cpp":
         return FastSDRClassifier.read(proto.sdrClassifier)
     else:
         raise ValueError("Invalid classifier implementation (%r). Value must be " '"py" or "cpp".' % impl)
Пример #11
0
  def testWriteRead(self):
    c1 = SDRClassifier([0], 0.1, 0.1, 0)

    # Create a vector of input bit indices
    input1 = [1, 5, 9]
    result = c1.compute(recordNum=0,
                        patternNZ=input1,
                        classification={'bucketIdx': 4, 'actValue': 34.7},
                        learn=True, infer=True)

    proto1 = SdrClassifier_capnp.SdrClassifierProto.new_message()
    c1.write(proto1)

    # Write the proto to a temp file and read it back into a new proto
    with tempfile.TemporaryFile() as f:
      proto1.write(f)
      f.seek(0)
      proto2 = SdrClassifier_capnp.SdrClassifierProto.read(f)

    # Load the deserialized proto
    c2 = SDRClassifier.read(proto2)

    self.assertEqual(c1.steps, c2.steps)
    self.assertAlmostEqual(c1.alpha, c2.alpha)
    self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha)
    self.assertEqual(c1._learnIteration, c2._learnIteration)
    self.assertEqual(c1._recordNumMinusLearnIteration, c2._recordNumMinusLearnIteration)
    self.assertEqual(c1._patternNZHistory, c2._patternNZHistory)
    self.assertEqual(c1._weightMatrix.keys(), c2._weightMatrix.keys())
    for step in c1._weightMatrix.keys():
      c1Weight = c1._weightMatrix[step]
      c2Weight = c2._weightMatrix[step]
      self.assertSequenceEqual(list(c1Weight.flatten()),
                               list(c2Weight.flatten()))
    self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx)
    self.assertEqual(c1._maxInputIdx, c2._maxInputIdx)
    self.assertEqual(len(c1._actualValues), len(c2._actualValues))
    for i in xrange(len(c1._actualValues)):
      self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5)
    self.assertEqual(c1._version, c2._version)
    self.assertEqual(c1.verbosity, c2.verbosity)

    result1 = c1.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)
    result2 = c2.compute(recordNum=1,
                         patternNZ=input1,
                         classification={'bucketIdx': 4, 'actValue': 34.7},
                         learn=True, infer=True)

    self.assertEqual(result1.keys(), result2.keys())
    for key in result1.keys():
      for i in xrange(len(c1._actualValues)):
        self.assertAlmostEqual(result1[key][i], result2[key][i], 5)
Пример #12
0
 def read(proto):
     """
 :param proto: SDRClassifierRegionProto capnproto object
 """
     impl = proto.implementation
     if impl == 'py':
         return SDRClassifier.read(proto.sdrClassifier)
     elif impl == 'cpp':
         return FastSDRClassifier.read(proto.sdrClassifier)
     else:
         raise ValueError(
             'Invalid classifier implementation (%r). Value must be '
             '"py" or "cpp".' % impl)
Пример #13
0
 def read(proto):
   """
   :param proto: SDRClassifierRegionProto capnproto object
   """
   impl = proto.implementation
   if impl == 'py':
     return SDRClassifier.read(proto.sdrClassifier)
   elif impl == 'cpp':
     return FastSDRClassifier.read(proto.sdrClassifier)
   elif impl == 'diff':
     return SDRClassifierDiff.read(proto.sdrClassifier)
   else:
     raise ValueError('Invalid classifier implementation (%r). Value must be '
                      '"py", "cpp" or "diff".' % impl)
Пример #14
0
    def create(*args, **kwargs):
        """
    Create a SDR classifier factory.
    The implementation of the SDR Classifier can be specified with
    the "implementation" keyword argument.

    The SDRClassifierFactory uses the implementation as specified in
     src/nupic/support/nupic-default.xml
    """
        impl = kwargs.pop('implementation', None)
        if impl is None:
            impl = Configuration.get('nupic.opf.sdrClassifier.implementation')
        if impl == 'py':
            return SDRClassifier(*args, **kwargs)
        else:
            raise ValueError(
                'Invalid classifier implementation (%r). Value must be '
                '"py".' % impl)
Пример #15
0
    def __init__(self, model_params):
        # Init an HTM network
        self.network = Network()

        # Getting parameters for network regions
        self.sensor_params = model_params['Sensor']
        self.spatial_pooler_params = model_params['SpatialPooler']
        self.temporal_memory_params = model_params['TemporalMemory']
        self.classifiers_params = model_params['Classifiers']
        self.encoders_params = model_params['Encoders']

        # Adding regions to HTM network
        self.network.addRegion('DurationEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['duration']))
        self.network.addRegion('VelocityEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['pitch']))
        self.network.addRegion('PitchEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['velocity']))

        self.network.addRegion('SpatialPooler', 'py.SPRegion',
                               json.dumps(self.spatial_pooler_params))
        self.network.addRegion('TemporalMemory', 'py.TMRegion',
                               json.dumps(self.temporal_memory_params))

        # Creating outer classifiers for multifield prediction
        dclp = self.classifiers_params['duration']
        vclp = self.classifiers_params['pitch']
        pclp = self.classifiers_params['velocity']

        self.duration_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=dclp['verbosity'],
            alpha=dclp['alpha'],
            actValueAlpha=dclp['actValueAlpha'])
        self.velocity_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=vclp['verbosity'],
            alpha=vclp['alpha'],
            actValueAlpha=vclp['actValueAlpha'])
        self.pitch_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=pclp['verbosity'],
            alpha=pclp['alpha'],
            actValueAlpha=pclp['actValueAlpha'])

        self._link_all_regions()
        self._enable_learning()
        self._enable_inference()

        self.network.initialize()
Пример #16
0
    def create(*args, **kwargs):
        """
    Create a SDR classifier factory.
    The implementation of the SDR Classifier can be specified with
    the "implementation" keyword argument.

    The SDRClassifierFactory uses the implementation as specified in
     `Default NuPIC Configuration <default-config.html>`_.
    """
        impl = kwargs.pop('implementation', None)
        if impl is None:
            impl = Configuration.get('nupic.opf.sdrClassifier.implementation')
        if impl == 'py':
            return SDRClassifier(*args, **kwargs)
        elif impl == 'cpp':
            return FastSDRClassifier(*args, **kwargs)
        else:
            raise ValueError(
                'Invalid classifier implementation (%r). Value must be '
                '"py" or "cpp".' % impl)
Пример #17
0
def main():
    # cluster similar inputs together in SDR space
    s = SpatialPooler()
    print(type(s))

    # powerful sequence memory in SDR space
    t = TemporalMemory()
    print(type(t))

    # computes rolling Gaussian based on raw anomaly scores and then their
    # likelihood
    a = AnomalyLikelihood()
    print(type(a))

    # temporally groups active cell sets from TM
    u = UnionTemporalPooler()
    print(type(u))

    # learning pairings of Union representations and labeled classes
    c = SDRClassifier()
    print(type(c))
Пример #18
0
    def compute(self, recordNum, patternNZ, classification, learn, infer):
        result1 = self._sdrClassifier.compute(recordNum, patternNZ,
                                              classification, learn, infer)
        result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ,
                                                 classification, learn, infer)
        self._calls += 1
        # Check if it is time to serialize and deserialize.
        if self._calls % self._callsPerSerialize == 0:
            schemaPy = self._sdrClassifier.getSchema()
            protoPy = schemaPy.new_message()
            self._sdrClassifier.write(protoPy)
            protoPy = schemaPy.from_bytes(protoPy.to_bytes())
            self._sdrClassifier = SDRClassifier.read(protoPy)

            schemaCpp = self._sdrClassifierCpp.getSchema()
            protoCpp = schemaCpp.new_message()
            self._sdrClassifierCpp.write(protoCpp)
            protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes())
            self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp)

        # Assert both results are the same type.
        assert type(result1) == type(result2)
        # Assert that the keys match.
        assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
          "py result=%s, C++ result=%s" % (result1, result2)
        # Assert that the values match.
        for k, l in result1.items():
            assert type(l) == type(result2[k])
            for i in range(len(l)):
                if isinstance(classification['actValue'], numbers.Real):
                    assert abs(
                        float(l[i]) - float(result2[k][i])
                    ) < 0.0000001, (
                        'Python SDRClassifier has value %f and C++ SDRClassifierCpp has '
                        'value %f.' % (l[i], result2[k][i]))
                else:
                    assert l[i] == result2[k][i], (
                        'Python SDRClassifier has value %s and C++ SDRClassifierCpp has '
                        'value %s.' % (str(l[i]), str(result2[k][i])))
        return result1
Пример #19
0
  def compute(self, recordNum, patternNZ, classification, learn, infer):
    result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification,
                                          learn, infer)
    result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ,
                                              classification, learn, infer)
    self._calls += 1
    # Check if it is time to serialize and deserialize.
    if self._calls % self._callsPerSerialize == 0:
      schemaPy = self._sdrClassifier.getSchema()
      protoPy = schemaPy.new_message()
      self._sdrClassifier.write(protoPy)
      protoPy = schemaPy.from_bytes(protoPy.to_bytes())
      self._sdrClassifier = SDRClassifier.read(protoPy)

      schemaCpp = self._sdrClassifierCpp.getSchema()
      protoCpp = schemaCpp.new_message()
      self._sdrClassifierCpp.write(protoCpp)
      protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes())
      self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp)

    # Assert both results are the same type.
    assert type(result1) == type(result2)
    # Assert that the keys match.
    assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
      "py result=%s, C++ result=%s" % (result1, result2)
    # Assert that the values match.
    for k, l in result1.iteritems():
      assert type(l) == type(result2[k])
      for i in xrange(len(l)):
        if isinstance(classification['actValue'], numbers.Real):
          assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, (
              'Python SDRClassifier has value %f and C++ SDRClassifierCpp has '
              'value %f.' % (l[i], result2[k][i]))
        else:
          assert l[i] == result2[k][i], (
              'Python SDRClassifier has value %s and C++ SDRClassifierCpp has '
              'value %s.' % (str(l[i]), str(result2[k][i])))
    return result1
Пример #20
0
from nupic.algorithms.spatial_pooler import SpatialPooler
from nupic.algorithms.temporal_memory import TemporalMemory
from nupic.algorithms.sdr_classifier import SDRClassifier

N = 900
x = np.sin(np.arange(N) * 2 * np.pi / 30.0)
inputDimensions = (256, )
columnDimensions = (512, )

encoder = ScalarEncoder(21, -1.0, 1.0, n=inputDimensions[0])
sp = SpatialPooler(inputDimensions=inputDimensions,
                   columnDimensions=columnDimensions,
                   globalInhibition=True,
                   numActiveColumnsPerInhArea=21)
tm = TemporalMemory(columnDimensions=columnDimensions)
c = SDRClassifier(steps=[1], alpha=0.1, actValueAlpha=0.1, verbosity=0)

x_true = x[1:]
x_predict = np.zeros(len(x) - 1)

for i, xi in tqdm(enumerate(x[:-1])):
    encoded = encoder.encode(xi)
    bucketIdx = np.where(encoded > 0)[0][0]
    spd = np.zeros(columnDimensions[0])
    sp.compute(encoded, True, spd)
    active_indices = np.where(spd > 0)[0]
    tm.compute(active_indices)

    active_cell_indices = tm.getActiveCells()
    predictive_cell_indices = tm.getPredictiveCells()
    patternNZ = np.asarray(active_cell_indices)
Пример #21
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-t',
        '--time',
        type=float,
        default=1,
        help='Number of times to run through the training data.')
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args(args=argv)

    # Load data.
    train_labels, train_images, test_labels, test_images = load_mnist()

    if False:
        # Experiment to verify that input dimensions are handled correctly If
        # you enable this, don't forget to rescale the radii as well as the
        # input.
        from scipy.ndimage import zoom
        new_sz = (1, 4, 1)
        train_images = [zoom(im, new_sz, order=0) for im in train_images]
        test_images = [zoom(im, new_sz, order=0) for im in test_images]

    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))
    random.shuffle(training_data)
    random.shuffle(test_data)
    if args.debug and args.time < 1:
        test_data = test_data[:int(len(test_data) * args.time)]
    # Setup spatial pooler machine.
    enc = BWImageEncoder(train_images[0].shape[:2])
    sp = SpatialPooler(input_sdr=enc.output, segments=1, **parameters)
    sdrc = SDRClassifier(steps=[0])

    if verbose:
        print(sp.statistics())

    # Training Loop
    train_cycles = len(train_images) * args.time
    if verbose:
        print("Training for %d cycles" % train_cycles)
    for i in range(int(round(train_cycles))):
        sp.reset()
        img, lbl = random.choice(training_data)
        img = synthesize(img, diag=False)
        enc.encode(np.squeeze(img))
        sp.compute()
        sdrc.compute(i,
                     sp.columns.flat_index,
                     classification={
                         "bucketIdx": lbl,
                         "actValue": lbl
                     },
                     learn=True,
                     infer=False)

    if verbose:
        print("Done training.")
        print("")
        print("Removing zero permanence synapses.")
        sp.synapses.remove_zero_permanence_synapses()
        print(sp.statistics())

    # Testing Loop
    if verbose:
        print("Testing for %d cycles." % len(test_data))
    score = 0
    for img, lbl in test_data:
        enc.encode(np.squeeze(img))
        sp.compute(learn=False)
        try:
            inference = sdrc.infer(sp.columns.flat_index, None)[0]
        except IndexError:
            inference = np.zeros(10)
        if lbl == np.argmax(inference):
            score += 1

    print('Score:', 100 * score / len(test_data), '%')

    if synapses_debug:
        sp.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    return score / len(test_data)
Пример #22
0
class HTMusicModel(object):
    def __init__(self, model_params):
        # Init an HTM network
        self.network = Network()

        # Getting parameters for network regions
        self.sensor_params = model_params['Sensor']
        self.spatial_pooler_params = model_params['SpatialPooler']
        self.temporal_memory_params = model_params['TemporalMemory']
        self.classifiers_params = model_params['Classifiers']
        self.encoders_params = model_params['Encoders']

        # Adding regions to HTM network
        self.network.addRegion('DurationEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['duration']))
        self.network.addRegion('VelocityEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['pitch']))
        self.network.addRegion('PitchEncoder', 'ScalarSensor',
                               json.dumps(self.encoders_params['velocity']))

        self.network.addRegion('SpatialPooler', 'py.SPRegion',
                               json.dumps(self.spatial_pooler_params))
        self.network.addRegion('TemporalMemory', 'py.TMRegion',
                               json.dumps(self.temporal_memory_params))

        # Creating outer classifiers for multifield prediction
        dclp = self.classifiers_params['duration']
        vclp = self.classifiers_params['pitch']
        pclp = self.classifiers_params['velocity']

        self.duration_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=dclp['verbosity'],
            alpha=dclp['alpha'],
            actValueAlpha=dclp['actValueAlpha'])
        self.velocity_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=vclp['verbosity'],
            alpha=vclp['alpha'],
            actValueAlpha=vclp['actValueAlpha'])
        self.pitch_classifier = SDRClassifier(
            steps=(1, ),
            verbosity=pclp['verbosity'],
            alpha=pclp['alpha'],
            actValueAlpha=pclp['actValueAlpha'])

        self._link_all_regions()
        self._enable_learning()
        self._enable_inference()

        self.network.initialize()

    def _link_all_regions(self):
        # Linking regions
        self.network.link('DurationEncoder', 'SpatialPooler', 'UniformLink',
                          '')
        self.network.link('VelocityEncoder', 'SpatialPooler', 'UniformLink',
                          '')
        self.network.link('PitchEncoder', 'SpatialPooler', 'UniformLink', '')
        self.network.link('SpatialPooler',
                          'TemporalMemory',
                          'UniformLink',
                          '',
                          srcOutput='bottomUpOut',
                          destInput='bottomUpIn')

    def _enable_learning(self):
        # Enable learning for all regions.
        self.network.regions["SpatialPooler"].setParameter("learningMode", 1)
        self.network.regions["TemporalMemory"].setParameter("learningMode", 1)

    def _enable_inference(self):
        # Enable inference for all regions.
        self.network.regions["SpatialPooler"].setParameter("inferenceMode", 1)
        self.network.regions["TemporalMemory"].setParameter("inferenceMode", 1)

    def train(self, duration, pitch, velocity):
        records_total = self.network.regions['SpatialPooler'].getSelf(
        ).getAlgorithmInstance().getIterationNum()

        self.network.regions['DurationEncoder'].setParameter(
            'sensedValue', duration)
        self.network.regions['PitchEncoder'].setParameter('sensedValue', pitch)
        self.network.regions['VelocityEncoder'].setParameter(
            'sensedValue', velocity)
        self.network.run(1)

        # Getting active cells of TM and bucket indicies of encoders to feed classifiers
        active_cells = numpy.array(
            self.network.regions['TemporalMemory'].getOutputData(
                'bottomUpOut')).nonzero()[0]
        duration_bucket = numpy.array(
            self.network.regions['DurationEncoder'].getOutputData('bucket'))
        pitch_bucket = numpy.array(
            self.network.regions['PitchEncoder'].getOutputData('bucket'))
        velocity_bucket = numpy.array(
            self.network.regions['VelocityEncoder'].getOutputData('bucket'))

        duration_classifier_result = self.duration_classifier.compute(
            recordNum=records_total,
            patternNZ=active_cells,
            classification={
                'bucketIdx': duration_bucket[0],
                'actValue': duration
            },
            learn=True,
            infer=False)

        pitch_classifier_result = self.pitch_classifier.compute(
            recordNum=records_total,
            patternNZ=active_cells,
            classification={
                'bucketIdx': pitch_bucket[0],
                'actValue': pitch
            },
            learn=True,
            infer=False)

        velocity_classifier_result = self.velocity_classifier.compute(
            recordNum=records_total,
            patternNZ=active_cells,
            classification={
                'bucketIdx': velocity_bucket[0],
                'actValue': velocity
            },
            learn=True,
            infer=False)

    def generate(self, seed, output_dir, event_amount):
        records_total = self.network.regions['SpatialPooler'].getSelf(
        ).getAlgorithmInstance().getIterationNum()

        seed = seed

        midi = pretty_midi.PrettyMIDI()
        midi_program = pretty_midi.instrument_name_to_program(
            'Acoustic Grand Piano')
        piano = pretty_midi.Instrument(program=midi_program)
        clock = 0
        for iters in tqdm(range(records_total, records_total + event_amount)):
            duration = seed[0]
            pitch = seed[1]
            velocity = seed[2]

            self.network.regions['DurationEncoder'].setParameter(
                'sensedValue', duration)
            self.network.regions['PitchEncoder'].setParameter(
                'sensedValue', pitch)
            self.network.regions['VelocityEncoder'].setParameter(
                'sensedValue', velocity)
            self.network.run(1)

            # Getting active cells of TM and bucket indicies of encoders to feed classifiers
            active_cells = numpy.array(
                self.network.regions['TemporalMemory'].getOutputData(
                    'bottomUpOut')).nonzero()[0]

            duration_bucket = numpy.array(
                self.network.regions['DurationEncoder'].getOutputData(
                    'bucket'))

            pitch_bucket = numpy.array(
                self.network.regions['PitchEncoder'].getOutputData('bucket'))

            velocity_bucket = numpy.array(
                self.network.regions['VelocityEncoder'].getOutputData(
                    'bucket'))

            # Getting up classifiers result

            duration_classifier_result = self.duration_classifier.compute(
                recordNum=records_total,
                patternNZ=active_cells,
                classification={
                    'bucketIdx': duration_bucket[0],
                    'actValue': duration
                },
                learn=False,
                infer=True)

            pitch_classifier_result = self.pitch_classifier.compute(
                recordNum=records_total,
                patternNZ=active_cells,
                classification={
                    'bucketIdx': pitch_bucket[0],
                    'actValue': pitch
                },
                learn=False,
                infer=True)

            velocity_classifier_result = self.velocity_classifier.compute(
                recordNum=records_total,
                patternNZ=active_cells,
                classification={
                    'bucketIdx': velocity_bucket[0],
                    'actValue': velocity
                },
                learn=False,
                infer=True)

            du = duration_classifier_result[1].argmax()
            pi = pitch_classifier_result[1].argmax()
            ve = velocity_classifier_result[1].argmax()

            duration_top_probs = duration_classifier_result[1][
                0:2] / duration_classifier_result[1][0:2].sum()

            predicted_duration = duration_classifier_result['actualValues'][du]

            # predicted_duration = duration_classifier_result['actualValues'][du]
            predicted_pitch = pitch_classifier_result['actualValues'][pi]
            predicted_velocity = velocity_classifier_result['actualValues'][ve]

            # print duration_classifier_result

            note = pretty_midi.Note(velocity=int(predicted_velocity),
                                    pitch=int(predicted_pitch),
                                    start=float(clock),
                                    end=float(clock + predicted_duration))

            piano.notes.append(note)

            clock = clock + 0.25

            seed[0] = predicted_duration
            seed[1] = predicted_pitch
            seed[2] = predicted_velocity

        midi.instruments.append(piano)
        midi.remove_invalid_notes()
        time = datetime.datetime.now().strftime('%Y-%m-%d %H:%m:%S')
        midi.write(output_dir + time + '.mid')

    def load_model(self, load_path):

        # Loading SpatialPooler
        print 'Loading SpatialPooler'
        with open(load_path + 'sp.bin', 'rb') as sp:
            sp_builder = SpatialPoolerProto.read(
                sp, traversal_limit_in_words=2**61)
        self.network.regions['SpatialPooler'].getSelf(
        )._sfdr = self.network.regions['SpatialPooler'].getSelf()._sfdr.read(
            sp_builder)

        # Loading TemporalMemory
        print 'Loading TemporalMemory'
        self.network.regions['TemporalMemory'].getSelf().getAlgorithmInstance(
        ).loadFromFile(load_path + 'tm.bin')

        # Loading end classifier
        print 'Loading duration classifier'
        with open(load_path + 'dcl.bin', 'rb') as dcl:
            dcl_builder = SdrClassifierProto.read(
                dcl, traversal_limit_in_words=2**61)
        self.duration_classifier = self.duration_classifier.read(dcl_builder)

        # Loading pitch classifier
        print 'Loading pitch classifier'
        with open(load_path + 'pcl.bin', 'rb') as pcl:
            pcl_builder = SdrClassifierProto.read(
                pcl, traversal_limit_in_words=2**61)
        self.pitch_classifier = self.pitch_classifier.read(pcl_builder)

        # Loading velocity classifier
        print 'Loading velocity classifier'
        with open(load_path + 'vcl.bin', 'rb') as vcl:
            vcl_builder = SdrClassifierProto.read(
                vcl, traversal_limit_in_words=2**61)
        self.velocity_classifier = self.velocity_classifier.read(vcl_builder)

    def save_model(self, save_path):

        # Saving SpatialPooler
        print 'Saving SpatialPooler'
        sp_builder = SpatialPoolerProto.new_message()
        self.network.regions['SpatialPooler'].getSelf().getAlgorithmInstance(
        ).write(sp_builder)
        with open(save_path + 'sp.bin', 'w+b') as sp:
            sp_builder.write(sp)

        # Saving TemporalMemory
        print 'Saving TemporalMemory'
        self.network.regions['TemporalMemory'].getSelf().getAlgorithmInstance(
        ).saveToFile(save_path + 'tm.bin')

        # Saving end classifier
        print 'Saving duration classifier'
        dcl_builder = SdrClassifierProto.new_message()
        self.duration_classifier.write(dcl_builder)
        with open(save_path + 'dcl.bin', 'w+b') as dcl:
            dcl_builder.write(dcl)

        # Saving pitch classifier
        print 'Saving pitch classifier'
        pcl_builder = SdrClassifierProto.new_message()
        self.pitch_classifier.write(pcl_builder)
        with open(save_path + 'pcl.bin', 'w+b') as pcl:
            pcl_builder.write(pcl)

        # Saving velocity classifier
        print 'Saving velocity classifier'
        vcl_builder = SdrClassifierProto.new_message()
        self.velocity_classifier.write(vcl_builder)
        with open(save_path + 'vcl.bin', 'w+b') as vcl:
            vcl_builder.write(vcl)
Пример #23
0
class SDRClassifierDiff(object):
  """Classifier-like object that diffs the output from different classifiers.

  Instances of each version of the SDR classifier are created and each call to
  compute is passed to each version of the classifier. The results are diffed
  to make sure the there are no differences.

  Optionally, the classifiers can be serialized and deserialized after a
  specified number of calls to compute to ensure that serialization does not
  cause discrepencies between the results.

  TODO: Check internal state as well.
  TODO: Provide option to write output to a file.
  TODO: Provide record differences without throwing an exception.
  """


  __VERSION__ = 'SDRClassifierDiffV1'


  def __init__(self, steps=(1,), alpha=0.001, actValueAlpha=0.3, verbosity=0,
               callsPerSerialize=CALLS_PER_SERIALIZE):
    self._sdrClassifier = SDRClassifier(steps, alpha, actValueAlpha, verbosity)
    self._sdrClassifierCpp = SDRClassifierCpp(steps, alpha, actValueAlpha,
                                                verbosity)
    self._calls = 0
    self._callsPerSerialize = callsPerSerialize


  def compute(self, recordNum, patternNZ, classification, learn, infer):
    result1 = self._sdrClassifier.compute(recordNum, patternNZ, classification,
                                          learn, infer)
    result2 = self._sdrClassifierCpp.compute(recordNum, patternNZ,
                                              classification, learn, infer)
    self._calls += 1
    # Check if it is time to serialize and deserialize.
    if self._calls % self._callsPerSerialize == 0:
      schemaPy = self._sdrClassifier.getSchema()
      protoPy = schemaPy.new_message()
      self._sdrClassifier.write(protoPy)
      protoPy = schemaPy.from_bytes(protoPy.to_bytes())
      self._sdrClassifier = SDRClassifier.read(protoPy)

      schemaCpp = self._sdrClassifierCpp.getSchema()
      protoCpp = schemaCpp.new_message()
      self._sdrClassifierCpp.write(protoCpp)
      protoCpp = schemaCpp.from_bytes(protoCpp.to_bytes())
      self._sdrClassifierCpp = SDRClassifierCpp.read(protoCpp)

    # Assert both results are the same type.
    assert type(result1) == type(result2)
    # Assert that the keys match.
    assert set(result1.keys()) == set(result2.keys()), "diff detected: " \
      "py result=%s, C++ result=%s" % (result1, result2)
    # Assert that the values match.
    for k, l in result1.iteritems():
      assert type(l) == type(result2[k])
      for i in xrange(len(l)):
        if isinstance(classification['actValue'], numbers.Real):
          assert abs(float(l[i]) - float(result2[k][i])) < 0.0000001, (
              'Python SDRClassifier has value %f and C++ SDRClassifierCpp has '
              'value %f.' % (l[i], result2[k][i]))
        else:
          assert l[i] == result2[k][i], (
              'Python SDRClassifier has value %s and C++ SDRClassifierCpp has '
              'value %s.' % (str(l[i]), str(result2[k][i])))
    return result1
Пример #24
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--episode_length',
        type=int,
        default=100,
    )
    parser.add_argument(
        '--train_episodes',
        type=int,
        default=100 * len(patterns),
    )
    parser.add_argument(
        '--test_episodes',
        type=int,
        default=20 * len(patterns),
    )
    parser.add_argument(
        '--environment_size',
        type=int,
        default=40,
    )
    parser.add_argument('--move_env', action='store_true')
    parser.add_argument('--show_pattern', action='store_true')
    args = parser.parse_args(args=argv)

    # PARAMETER OVERRIDES!
    parameters['grid_cells'] = default_parameters['grid_cells']

    if verbose:
        import pprint
        print("Parameters = ", end='')
        pprint.pprint(parameters)
        print("Episode Length", args.episode_length)

    env = Environment(size=args.environment_size)
    gc = GridCellEncoder(**parameters['grid_cells'])

    trajectory = TemporalMemory(column_sdr=gc.grid_cells,
                                context_sdr=None,
                                anomaly_alpha=1. / 1000,
                                predicted_boost=1,
                                segments_per_cell=20,
                                **parameters['trajectory'])
    trajectory_sdrc = SDRClassifier(steps=[0])

    motion = StableSpatialPooler(input_sdr=SDR(trajectory.active),
                                 **parameters['motion'])
    motion_sdrc = SDRClassifier(steps=[0])

    def reset():
        env.reset()
        gc.reset()
        trajectory.reset()
        motion.reset()

    env_offset = np.zeros(2)

    def compute(learn=True):
        gc_sdr = gc.encode(env.position + env_offset)

        trajectory.compute(
            column_sdr=gc_sdr,
            learn=learn,
        )

        motion.compute(
            input_sdr=trajectory.active,
            input_learning_sdr=trajectory.learning,
            learn=learn,
        )

    # Train
    if verbose:
        print("Training for %d episodes ..." % args.train_episodes)
        start_time = time.time()
    for session in range(args.train_episodes):
        reset()
        pattern = random.randrange(len(patterns))
        pattern_func = patterns[pattern]
        for step in range(args.episode_length):
            angle = pattern_func(env.angle * 180 / math.pi,
                                 motion.age) * math.pi / 180
            env.move(angle)
            if env.collision:
                reset()
                continue
            compute()
            trajectory_sdrc.compute(trajectory.age,
                                    trajectory.learning.flat_index,
                                    classification={
                                        "bucketIdx": pattern,
                                        "actValue": pattern
                                    },
                                    learn=True,
                                    infer=False)
            motion_sdrc.compute(motion.age,
                                motion.columns.flat_index,
                                classification={
                                    "bucketIdx": pattern,
                                    "actValue": pattern
                                },
                                learn=True,
                                infer=False)
            if verbose and motion.age % 10000 == 0:
                print("Cycle %d" % motion.age)
        if args.show_pattern:
            env.plot_course()

    if verbose:
        train_time = time.time() - start_time
        start_time = time.time()
        print("Elapsed time (training): %d seconds." % int(round(train_time)))
        print("")
        print("Trajectory", trajectory.statistics())
        print("Motion", motion.statistics())
        print("")

    # Test
    if verbose:
        print("Testing for %d episodes ..." % args.test_episodes)
    if args.move_env:
        env_offset = np.array([9 * env.size, 9 * env.size])
        if verbose:
            print("Moved to new environment.")
    trajectory_accuracy = 0
    motion_accuracy = 0
    sample_size = 0
    trajectory_confusion = np.zeros((len(patterns), len(patterns)))
    motion_confusion = np.zeros((len(patterns), len(patterns)))
    for episode in range(args.test_episodes):
        reset()
        pattern = random.randrange(len(patterns))
        pattern_func = patterns[pattern]
        for step in range(args.episode_length):
            angle = pattern_func(env.angle * 180 / math.pi,
                                 motion.age) * math.pi / 180
            env.move(angle)
            if env.collision:
                reset()
                continue
            compute(learn=True)
            trajectory_inference = trajectory_sdrc.infer(
                trajectory.learning.flat_index, None)[0]
            if pattern == np.argmax(trajectory_inference):
                trajectory_accuracy += 1
            trajectory_confusion[pattern][np.argmax(trajectory_inference)] += 1
            motion_inference = motion_sdrc.infer(motion.columns.flat_index,
                                                 None)[0]
            if pattern == np.argmax(motion_inference):
                motion_accuracy += 1
            motion_confusion[pattern][np.argmax(motion_inference)] += 1
            sample_size += 1
    trajectory_accuracy /= sample_size
    motion_accuracy /= sample_size
    if verbose:
        print("Trajectory Accuracy %g, %d catagories." %
              (trajectory_accuracy, len(patterns)))
        print("Motion Accuracy     %g" % motion_accuracy)

    # Display Confusion Matixes
    if verbose:
        conf_matrices = (
            trajectory_confusion,
            motion_confusion,
        )
        conf_titles = (
            'Trajectory',
            'Motion',
        )
        #
        plt.figure("Pattern Recognition Confusion")
        for subplot_idx, matrix_title in enumerate(
                zip(conf_matrices, conf_titles)):
            matrix, title = matrix_title
            plt.subplot(1, len(conf_matrices), subplot_idx + 1)
            plt.title(title + " Confusion")
            matrix_sum = np.sum(matrix, axis=1)
            matrix_sum[matrix_sum == 0] = 1
            matrix = (matrix.T / matrix_sum).T
            plt.imshow(matrix, interpolation='nearest')
            plt.xlabel('Prediction')
            plt.ylabel('Label')

    if synapses_debug:
        gc.synapses.check_data_integrity()
        trajectory.synapses.check_data_integrity()
        motion.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    if verbose:
        test_time = time.time() - start_time
        print("Elapsed time (testing): %d seconds." % int(round(test_time)))
        plt.show()
    return motion_accuracy
  printTPRegionParams(model._getTPRegion())

  inputData = "%s/%s.csv" % (DATA_DIR, dataSet.replace(" ", "_"))

  sensor = model._getSensorRegion()
  encoderList = sensor.getSelf().encoder.getEncoderList()
  if sensor.getSelf().disabledEncoder is not None:
    classifier_encoder = sensor.getSelf().disabledEncoder.getEncoderList()
    classifier_encoder = classifier_encoder[0]
  else:
    classifier_encoder = None

  # initialize new SDR classifier
  numTMcells = model._getTPRegion().getSelf()._tfdr.numberOfCells()
  sdrClassifier = SDRClassifier(steps=[5], alpha=0.005)

  _METRIC_SPECS = getMetricSpecs(predictedField, stepsAhead=_options.stepsAhead)
  metric = metrics.getModule(_METRIC_SPECS[0])
  metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(),
                                  model.getInferenceType())

  if plot:
    plotCount = 1
    plotHeight = max(plotCount * 3, 6)
    fig = plt.figure(figsize=(14, plotHeight))
    gs = gridspec.GridSpec(plotCount, 1)
    plt.title(predictedField)
    plt.ylabel('Data')
    plt.xlabel('Timed')
    plt.tight_layout()
Пример #26
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--time', type=int, default=20,
                        help='Number of times to run through the training data.')
    parser.add_argument('--dataset', choices=('states', 'dictionary', 'gutenberg'),
        default='states')
    parser.add_argument('--words', type=int, default=500,
        help='Number of words to use.')
    parser.add_argument('--typo', type=float, default=0.,
        help='Misspell words, percentage [0-1], default 0.')
    parser.add_argument('--practice', type=int, default=0,
        help='Makes the task easier by repeating words.')
    parser.add_argument('--learned_stability', action='store_true',
        help='Disable the stability mechanism during tests.')
    parser.add_argument('--disable_tm_sdrc', action='store_true',)
    args = parser.parse_args(args = argv)

    assert(parameters['tp_nz_value'] > 0)

    if verbose:
        print("Parameters = ", end='')
        import pprint
        pprint.pprint(parameters)
        print("")

    # Load dataset.  The dataset consists of three variables:
    # 1) training_data is a list of words.
    # 2) testing_data is a list of words.
    # 3) dataset is dictionary of word -> identifier pairs.
    if args.dataset == 'states':
        # Remove spaces from between the two word states names.
        dataset       = [word.replace(' ', '') for word in state_names]
        training_data = dataset * args.time
        testing_data  = dataset * 5
        random.shuffle(training_data)
        random.shuffle(testing_data)
        if verbose:
            print("Dataset is %d state names."%len(dataset))
    elif args.dataset == 'dictionary':
        dataset       = read_dictionary()
        dataset       = random.sample(dataset, args.words)
        training_data = dataset * args.time
        testing_data  = dataset * 5
        random.shuffle(training_data)
        random.shuffle(testing_data)
        if verbose:
            print("Dataset is %d dictionary words."%len(dataset))
    elif args.dataset == 'gutenberg':
        text          = read_gutenberg(args.time)
        split         = int(.80 * len(text))    # Fraction of data to train on.
        training_data = text[ : split]
        testing_data  = text[split : ]
        # Put the most common words into the dataset to be trained & tested on.
        histogram     = {}
        for word in training_data:
            if word not in histogram:
                histogram[word] = 0
            histogram[word] += 1
        histogram.pop('S', None)    # Remove apostrophy 'S'.
        dataset = sorted(histogram, key = lambda word: histogram[word])
        dataset = dataset[ -args.words : ]
        if verbose:
            print("Dataset is %d words from Project Gutenberg."%len(dataset))
            unique_train = len(set(training_data))
            unique_test  = len(set(testing_data))
            print("Unique words in training data %d, testing data %d"%(unique_train, unique_test))

    dataset = {word: idx for idx, word in enumerate(sorted(set(dataset)))}
    if verbose:
        print("Training data %d words, %g%% dataset coverage."%(
            len(training_data),
            100. * sum(1 for w in training_data if w in dataset) / len(dataset)))
        print("Testing data %d words, %g%% dataset coverage."%(
            len(testing_data),
            100. * sum(1 for w in testing_data if w in dataset) / len(dataset)))
        print("Dataset: " + ", ".join('%d) %s'%(dataset[word], word) for word in sorted(dataset)))

    if args.practice:
        insertion_point  = int(len(training_data) / 2)
        practice_dataset = list(dataset)
        random.shuffle(practice_dataset)
        for word in practice_dataset:
            for attempt in range(args.practice):
                training_data.insert(insertion_point, word)

    # Construct TM.
    diagnostics_alpha = parameters['sp']['boosting_alpha']
    enc = EnumEncoder(**parameters['enc'])
    enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha)
    sp = SpatialPooler(
        input_sdr         = enc.output_sdr,
        **parameters['sp'])
    tm = TemporalMemory(
        column_sdr        = sp.columns,
        context_sdr       = SDR((parameters['tp']['mini_columns'],)),
        anomaly_alpha     = diagnostics_alpha,
        **parameters['tm'])
    if not args.disable_tm_sdrc:
        tm_sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc'])
        tm_sdrc.compute(-1, [tm.active.size-1],    # Initialize the SDRCs internal table.
            classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
            learn=True, infer=False)
    tp = StableSpatialPooler(
        input_sdr         = tm.active,
        macro_columns     = (1,),
        **parameters['tp'])
    tp_sdrc = SDRClassifier(steps=[0], **parameters['tp_sdrc'])
    tp_sdrc.compute(-1, [tp.columns.size-1],    # Initialize the SDRCs internal table.
        classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
        learn=True, infer=False)

    def reset():
        enc.output_sdr.zero()
        sp.reset()
        tm.reset()
        tp.reset()

    def compute(char, learn):
        enc.encode(char)
        sp.compute(learn=learn)
        tm.context_sdr.flat_index = tp.columns.flat_index
        tm.context_sdr.nz_values.fill(parameters['tp_nz_value'])
        tm.compute(learn=learn)
        tp.compute(learn=learn,
            input_learning_sdr = tm.learning,)

    # TRAIN
    if verbose:
        train_cycles = sum(len(w) for w in training_data)
        iterations   = len(training_data) / len(dataset)
        print("Training for %d cycles (%d dataset iterations)"%(train_cycles, iterations))

    reset()
    for word in training_data:
        for idx, char in enumerate(word):
            compute(char, learn=True)
        # Process each word before training on the final character.
        try:
            label = dataset[word]
        except KeyError:
            continue
        if len(tm.learning) and not args.disable_tm_sdrc:
            tm_sdrc.compute(tm.age, tm.learning.flat_index,
                classification={"bucketIdx": label, "actValue": label},
                learn=True, infer=False)
        if len(tp.columns):
            tp_sdrc.compute(tp.age, tp.columns.flat_index,
                classification={"bucketIdx": label, "actValue": label},
                learn=True, infer=False)

    if verbose:
        print("Done training.  System statistics:")
        print("")
        print("Encoder", enc.output_sdr.statistics())
        print(sp.statistics())
        print(tm.statistics())
        print(tp.statistics())
        print("")

    # TEST
    # Make some new words which the system has never seen before.
    if verbose:
        random_words = []
        for word in dataset:
            alphabet    = [chr(ord('A') + i) for i in range(26)]
            random_word = ''.join(random.choice(alphabet) for c in word)
            random_words.append(random_word)
        print("Novel Words Dataset: " + ', '.join(random_words))
        print("")

        # Measure response to new random words.
        rand_word_tp_ovlp = 0.
        n_samples         = 0
        for word in random_words:
            reset()
            response = []
            for char in word:
                compute(char, learn = False)
                response.append(SDR(tp.columns))
            for sdr_a, sdr_b in itertools.combinations(response, 2):
                rand_word_tp_ovlp += sdr_a.overlap(sdr_b)
                n_samples += 1
        rand_word_tp_ovlp /= n_samples
        print("Novel Words (Isolated), Average Overlap Within Word %g %%"%(100 * rand_word_tp_ovlp))

        # Measure response to new random words, with the stability mechanism
        # turned off.
        stability_rate = tp.stability_rate
        tp.stability_rate = 1.
        rand_word_tp_ovlp_no_stab = 0.
        for word in random_words:
            reset()
            response = []
            for char in word:
                compute(char, learn = False)
                response.append(SDR(tp.columns))
            for sdr_a, sdr_b in itertools.combinations(response, 2):
                rand_word_tp_ovlp_no_stab += sdr_a.overlap(sdr_b)
        rand_word_tp_ovlp_no_stab /= n_samples
        tp.stability_rate = stability_rate
        print("Novel Words (Isolated), No Stability Mechanism, Avg Ovlp Within Word %g %%"%(100 * rand_word_tp_ovlp_no_stab))

        # Compare new word response to that of randomly generated SDRs.
        rand_sdr_ovlp = 0.
        tp_n_active   = len(tp.columns)
        for i in range(n_samples):
            sdr_a = SDR(tp.columns)
            sdr_b = SDR(tp.columns)
            sdr_a.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active))
            sdr_b.flat_index = np.array(random.sample(range(tp.columns.size), tp_n_active))
            rand_sdr_ovlp += sdr_a.overlap(sdr_b)
        rand_sdr_ovlp /= n_samples
        print("Random Comparable SDR(n=%d sparsity=%g%%), Average Overlap %g %%"%(
            tp.columns.size,
            100 * tp_n_active / tp.columns.size,
            100 * rand_sdr_ovlp),)
        print("")

    if args.learned_stability:
        tp.stability_rate = 1
        if verbose:
            print("")
            print("Disabled Stability Mechanism...")
            print("")

    # Measure response to each word in isolation.
    if verbose:
        catagories   = {word : [] for word in dataset}
        tm_accuacy   = 0.
        tp_accuacy   = 0.
        n_samples    = 0
        for word, word_id in dataset.items():
            reset()
            for char in word:
                compute(char, learn = False)
                catagories[word].append(SDR(tp.columns))
            if not args.disable_tm_sdrc:
                try:
                    tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0]
                except IndexError:
                    tm_inference = np.random.random(size=len(dataset))
                tm_accuacy += word_id == np.argmax(tm_inference)
            try:
                tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0]
            except IndexError:
                tp_inference = np.random.random(size=len(dataset))
            tp_accuacy += word_id == np.argmax(tp_inference)
            n_samples  += 1
        tm_accuacy /= n_samples
        tp_accuacy /= n_samples
        print("")
        print("Isolated Word Stability / Distinctiveness:")
        stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose)
        print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples))
        print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples))
        print("")

    # Measure response to words in context.  Measure the overlap between the
    # same words in different contexts.  Also check the classifier accuracy.
    catagories   = {word : [] for word in dataset}
    tm_accuacy   = 0.
    tp_accuacy   = 0.
    tm_confusion = np.zeros((len(dataset), len(dataset)))
    tp_confusion = np.zeros((len(dataset), len(dataset)))
    n_samples    = 0
    reset()
    for word in testing_data:
        if random.random() < args.typo:
            mutated_word = mutate_word(word)
        else:
            mutated_word = word

        for char in mutated_word:
            compute(char, learn = False)
            if word in catagories:
                catagories[word].append(SDR(tp.columns))

        # Check Classifier Accuracy.
        try:
            word_id = dataset[word]
        except KeyError:
            continue
        if not args.disable_tm_sdrc:
            try:
                tm_inference = tm_sdrc.infer(tm.active.flat_index, None)[0]
            except IndexError:
                tm_inference = np.random.random(size=len(dataset))
            tm_accuacy += word_id == np.argmax(tm_inference)
            tm_confusion[word_id] += tm_inference / np.sum(tm_inference)
        try:
            tp_inference = tp_sdrc.infer(tp.columns.flat_index, None)[0]
        except IndexError:
            tp_inference = np.random.random(size=len(dataset))
        tp_accuacy += word_id == np.argmax(tp_inference)
        tp_confusion[word_id] += tp_inference / np.sum(tp_inference)
        n_samples  += 1
    tm_accuacy /= n_samples
    tp_accuacy /= n_samples
    if verbose:
        print("")
        print("In-Context Word Stability / Distinctiveness:")
    stability, distinctiveness, stability_metric = measure_inter_intra_overlap(catagories, verbose=verbose)
    if verbose:
        print("Temporal Memory Classifier Accuracy %g %% (%d samples)"%(100 * tm_accuacy, n_samples))
        print("Temporal Pooler Classifier Accuracy %g %% (%d samples)"%(100 * tp_accuacy, n_samples))

    score = (stability * tm_accuacy * tp_accuacy)
    if verbose:
        print("Score: %g"%score)

    # Display Confusion Matixes
    if verbose:
        conf_matrices = (tm_confusion, tp_confusion,)
        conf_titles   = ('Temporal Memory', 'Temporal Pooler',)
        #
        import matplotlib.pyplot as plt
        plt.figure("Word Recognition Confusion")
        for subplot_idx, matrix_title in enumerate(zip(conf_matrices, conf_titles)):
            matrix, title = matrix_title
            plt.subplot(1, len(conf_matrices), subplot_idx + 1)
            plt.title(title + " Confusion")
            matrix /= np.sum(matrix, axis=0)
            plt.imshow(matrix, interpolation='nearest')
            plt.xlabel('Prediction')
            plt.ylabel('Label')
            for label, idx in dataset.items():
                plt.text(idx, len(dataset) + .5, label, rotation='vertical',
                    horizontalalignment='center', verticalalignment='bottom')
                plt.text(-1.5, idx, label,
                    horizontalalignment='left', verticalalignment='center')

    # Show a sample of input.
    if verbose:
        sentance        = []
        boundries       = []
        anomaly_hist    = []
        stability_hist  = []
        tp_active_hist  = []
        tp_class_hist   = []
        tp_prev_active  = SDR(tp.columns.dimensions)
        n_samples       = 0
        sample_data     = testing_data[ : 100]
        reset()
        for word in sample_data:
            if random.random() < args.typo:
                mutated_word = mutate_word(word)
            else:
                mutated_word = word

            for index, char in enumerate(mutated_word):
                compute(char, learn = False)
                try:
                    tp_inference = np.argmax(tp_sdrc.infer(tp.columns.flat_index, None)[0])
                except IndexError:
                    tp_inference = random.choice(range(len(dataset)))
                tp_class_hist.append(tp_inference)
                if index == 0:
                    boundries.append(n_samples)
                sentance.append(char)
                anomaly_hist.append(tm.anomaly)
                tp_active_hist.append(SDR(tp.columns))
                stability_hist.append(tp.columns.overlap(tp_prev_active))
                tp_prev_active = SDR(tp.columns)
                n_samples += 1

        plt.figure("ASCII Stability")
        stability_weighted = overlap_stability_weighted(tp_active_hist)
        plt.plot(
                 # np.arange(n_samples)+.5, anomaly_hist,   'ro',
                 # np.arange(n_samples)+.5, stability_hist, 'b-',
                 np.arange(n_samples)+.5, stability_weighted, 'b-',)
        for idx, char in enumerate(sentance):
            plt.text(idx + .5, .01, char, horizontalalignment='center')
        for x in boundries:
            plt.axvline(x, color='k')
        sorted_dataset = sorted(dataset)
        for idx, word_id in enumerate(tp_class_hist):
            word = sorted_dataset[word_id]
            plt.text(idx + .5, 1., word,
                rotation            = 90,
                horizontalalignment = 'center',
                verticalalignment   = 'top',)
        figure_title = "Output Layer Stability"
        if args.learned_stability:
            figure_title += " - Stability Mechanism Disabled."
        figure_title += "\nInput character at bottom, Classification at top, Vertical lines are word boundries."
        plt.title(figure_title)
        plt.ylabel('Stability')
        plt.xlabel('Time step')
        plt.show()

    if synapses_debug:
        sp.synapses.check_data_integrity()
        tm.synapses.check_data_integrity()
        tp.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    return score
Пример #27
0
def main(parameters=default_parameters, argv=None, verbose=True):
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--time', type=int, default=5,
                        help='Number of times to run through the training data.')
    parser.add_argument('--dataset', choices=('states', 'dictionary'), default='states')
    args = parser.parse_args(args = argv)

    # Load data.
    if args.dataset == 'states':
        dataset = state_names
        if verbose:
            print("Dataset is %d state names"%len(dataset))
    elif args.dataset == 'dictionary':
        dataset = read_dictionary()
        dataset = random.sample(dataset, 500)
        if verbose:
            print("Dataset is dictionary words, sample size %d"%len(dataset))

    dataset   = sorted(dataset)
    word_ids  = {word: idx for idx, word in enumerate(sorted(dataset))}
    confusion = np.zeros((len(dataset), len(dataset)))
    if verbose:
        print("Dataset: " + ", ".join('%d) %s'%idx_word for idx_word in enumerate(dataset)))

    # Construct TM.
    diagnostics_alpha = parameters['sp']['boosting_alpha']
    enc = EnumEncoder(**parameters['enc'])
    enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha)
    sp = SpatialPooler(
        input_sdr         = enc.output_sdr,
        **parameters['sp'])
    tm = TemporalMemory(
        column_sdr        = sp.columns,
        anomaly_alpha     = diagnostics_alpha,
        **parameters['tm'])
    sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc'])
    sdrc.compute(-1, [tm.active.size-1],    # Initialize the table.
        classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]},
        learn=True, infer=False)

    def reset():
        enc.output_sdr.zero()
        sp.reset()
        tm.reset()

    # Train.
    if verbose:
        train_cycles = args.time * sum(len(w) for w in dataset)
        print("Training for %d cycles (%d dataset iterations)"%(train_cycles, args.time))
    for i in range(args.time):
        random.shuffle(dataset)
        for word in dataset:
            reset()
            for idx, char in enumerate(word):
                enc.encode(char)
                sp.compute()
                tm.compute()
            lbl = word_ids[word]
            sdrc.compute(tm.age, tm.learning.flat_index,
                classification={"bucketIdx": lbl, "actValue": lbl},
                learn=True, infer=False)

    if verbose:
        print("Encoder", enc.output_sdr.statistics())
        print(sp.statistics())
        print(tm.statistics())

    # Test.
    score = 0.
    score_samples = 0
    for word in dataset:
        reset()
        for idx, char in enumerate(word):
            enc.encode(char)
            sp.compute(learn = False)
            tm.compute(learn = False)

        inference = sdrc.infer(tm.active.flat_index, None)
        lbl = word_ids[word]
        if lbl == np.argmax(inference[0]):
            score += 1
        score_samples += 1
        confusion[lbl] += inference[0]
    print("Score:", 100. * score / score_samples, '%')

    if synapses_debug:
        tm.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    if verbose:
        import matplotlib.pyplot as plt
        plt.figure('Confusion Matrix')
        plt.imshow(confusion, interpolation='nearest')
        plt.xlabel('Prediction')
        plt.ylabel('Label')
        plt.show()

    return score / score_samples
Пример #28
0
def main(parameters=default_parameters, argv=None, verbose=True):
    # Setup
    num_objects = 100
    object_sizes = range(20, 40 + 1)
    train_iterations = 100
    test_iterations = 5
    steps_per_object = range(3, 17 + 1)
    inputs, objects = object_dataset(num_objects, object_sizes)

    enc = EnumEncoder(2400, 0.02)
    enc.output_sdr = SDR(
        enc.output_sdr,
        activation_frequency_alpha=parameters['boosting_alpha'],
        average_overlap_alpha=parameters['boosting_alpha'],
    )

    sp = StableSpatialPooler(input_sdr=enc.output_sdr,
                             macro_columns=(1, ),
                             **parameters)
    sdrc = SDRClassifier(steps=[0])

    def measure_catagories():
        # Compute every sensation for every object.
        objects_columns = []
        for obj in objects:
            objects_columns.append([])
            for sensation in obj:
                sp.reset()
                enc.encode(sensation)
                sp.compute(learn=False)
                objects_columns[-1].append(SDR(sp.columns))
        sp.reset()
        return objects_columns

    if verbose:
        print("Num-Inputs  ", len(set(itertools.chain.from_iterable(objects))))
        print('Num-Objects ', num_objects)
        print("Object-Sizes", object_sizes)
        print("Steps/Object", steps_per_object)
        print(sp.statistics())
        objects_columns = measure_catagories()
        measure_inter_intra_overlap(objects_columns, verbose)
        print("")

        # TRAIN
        train_time = train_iterations * num_objects * np.mean(steps_per_object)
        print('TRAINING for ~%d Cycles (%d dataset iterations) ...' %
              (train_time, train_iterations))
        print("")

    sp.reset()
    t = 0
    for iteration in range(train_iterations):
        object_order = list(range(num_objects))
        random.shuffle(object_order)
        for object_id in object_order:
            for step in range(random.choice(steps_per_object)):
                sensation = random.choice(objects[object_id])
                enc.encode(sensation)
                sp.compute()
                try:
                    sdrc.compute(t,
                                 sp.columns.flat_index,
                                 classification={
                                     "bucketIdx": object_id,
                                     "actValue": object_id,
                                 },
                                 learn=True,
                                 infer=False)
                except ValueError:
                    print("Warning: len(active) = %d." % (len(sp.columns)))
                t += 1

    if verbose:
        print("TESTING ...")
        print("")
        print('Encoder Output', enc.output_sdr.statistics())
        print(sp.statistics())

    objects_columns = measure_catagories()
    _, __, stability_metric = measure_inter_intra_overlap(
        objects_columns, verbose)

    # Measure classification accuracy.  This test consists of looking at every
    # object a few times and then classifying it.  The AI is evaluated on every
    # cycle.
    score = 0
    max_score = 0
    sp.reset()
    if verbose:
        print("")
        print("Test length: %d dataset iterations." % (test_iterations))
    test_data = list(range(num_objects))
    for iteration in range(test_iterations):
        random.shuffle(test_data)
        for object_id in test_data:
            for step in range(random.choice(steps_per_object)):
                sensation = random.choice(objects[object_id])
                enc.encode(sensation)
                sp.compute(learn=True)
                inference = sdrc.infer(sp.columns.flat_index, None)[0]
                inference = np.argmax(inference)
                if inference == object_id:
                    score += 1
                max_score += 1
    if verbose:
        print('Classification Accuracy: %g %%' % (100 * score / max_score))

    if synapses_debug:
        sp.synapses.check_data_integrity()
        print("Synapse data structure integrity is OK.")

    return stability_metric + 10 * (score / max_score)
Пример #29
0
    def _doWriteReadChecks(self, computeBeforeSerializing):
        c1 = SDRClassifier([0], 0.1, 0.1, 0)

        # Create a vector of input bit indices
        input1 = [1, 5, 9]
        if computeBeforeSerializing:
            result = c1.compute(recordNum=0,
                                patternNZ=input1,
                                classification={
                                    'bucketIdx': 4,
                                    'actValue': 34.7
                                },
                                learn=True,
                                infer=True)

        proto1 = SdrClassifier_capnp.SdrClassifierProto.new_message()
        c1.write(proto1)

        # Write the proto to a temp file and read it back into a new proto
        with tempfile.TemporaryFile() as f:
            proto1.write(f)
            f.seek(0)
            proto2 = SdrClassifier_capnp.SdrClassifierProto.read(f)

        # Load the deserialized proto
        c2 = SDRClassifier.read(proto2)

        self.assertEqual(c1.steps, c2.steps)
        self.assertEqual(c1._maxSteps, c2._maxSteps)
        self.assertAlmostEqual(c1.alpha, c2.alpha)
        self.assertAlmostEqual(c1.actValueAlpha, c2.actValueAlpha)
        self.assertEqual(c1._patternNZHistory, c2._patternNZHistory)
        self.assertEqual(list(c1._weightMatrix.keys()),
                         list(c2._weightMatrix.keys()))
        for step in list(c1._weightMatrix.keys()):
            c1Weight = c1._weightMatrix[step]
            c2Weight = c2._weightMatrix[step]
            self.assertSequenceEqual(list(c1Weight.flatten()),
                                     list(c2Weight.flatten()))
        self.assertEqual(c1._maxBucketIdx, c2._maxBucketIdx)
        self.assertEqual(c1._maxInputIdx, c2._maxInputIdx)
        self.assertEqual(len(c1._actualValues), len(c2._actualValues))
        for i in range(len(c1._actualValues)):
            self.assertAlmostEqual(c1._actualValues[i], c2._actualValues[i], 5)
        self.assertEqual(c1._version, c2._version)
        self.assertEqual(c1.verbosity, c2.verbosity)

        # NOTE: the previous step's actual values determine the size of lists in
        # results
        expectedActualValuesLen = len(c1._actualValues)

        result1 = c1.compute(recordNum=1,
                             patternNZ=input1,
                             classification={
                                 'bucketIdx': 4,
                                 'actValue': 34.7
                             },
                             learn=True,
                             infer=True)
        result2 = c2.compute(recordNum=1,
                             patternNZ=input1,
                             classification={
                                 'bucketIdx': 4,
                                 'actValue': 34.7
                             },
                             learn=True,
                             infer=True)

        self.assertEqual(list(result1.keys()), list(result2.keys()))

        for key in list(result1.keys()):
            self.assertEqual(len(result1[key]), len(result2[key]))
            self.assertEqual(len(result1[key]), expectedActualValuesLen)

            for i in range(expectedActualValuesLen):
                self.assertAlmostEqual(result1[key][i], result2[key][i], 5)