def testSerialization5(self): # This test verifies that saveToFile() and loadFromFile with JSON on Predictor are accessible from Python. inputData = SDR( 1000 ).randomize( 0.02 ) categories = { 'A': 0, 'B': 1, 'C': 2, 'D': 3 } c1 = Predictor( steps=[1], alpha=1.0 ) c1.learn(1, inputData, categories['B'] ) file = "Predictor_test_save.JSON" c1.saveToFile(file, "JSON") c2 = Predictor( steps=[1], alpha=1.0 ) c2.loadFromFile(file, "JSON") os.remove(file)
def testMultistepSingleValue(self): classifier = Predictor(steps=[1, 2]) inp = SDR(10) inp.randomize(.2) for recordNum in range(10): classifier.learn(recordNum, inp, 0) retval = classifier.infer(10, inp) # Should have a probability of 100% for that bucket. self.assertEqual(retval[1], [1.]) self.assertEqual(retval[2], [1.])
def testSingleValue0Steps(self): """Send same value 10 times and expect high likelihood for prediction using 0-step ahead prediction""" pred = Predictor(steps=[0], alpha=0.5) # Enough times to perform Inference and learn associations inp = SDR(10) inp.randomize(.2) for recordNum in range(10): pred.learn(recordNum, inp, 2) retval = pred.infer(10, inp) self.assertGreater(retval[0][2], 0.9)
def testMultistepSimple(self): classifier = Predictor(steps=[1, 2], alpha=10.0) inp = SDR(10) for i in range(100): inp.sparse = [i % 10] classifier.learn(recordNum=i, pattern=inp, classification=(i % 10)) retval = classifier.infer(99, inp) self.assertGreater(retval[1][0], 0.99) for i in range(1, 10): self.assertLess(retval[1][i], 0.01) self.assertGreater(retval[2][1], 0.99) for i in [0] + list(range(2, 10)): self.assertLess(retval[2][i], 0.01)
def testComputeInferOrLearnOnly(self): c = Predictor([1], 1.0) inp = SDR(10) inp.randomize( .3 ) # learn only prediction = c.infer(pattern=inp)[1] self.assertTrue(prediction == []) # not enough training data -> [] c.learn(recordNum=0, pattern=inp, classification=4) self.assertTrue(c.infer(pattern=inp)[1] == []) # not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) self.assertTrue(c.infer(pattern=inp)[1] != []) # Don't crash with enough training data. # infer only retval1 = c.infer(pattern=inp) retval2 = c.infer(pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
def testExampleUsage(self): # Make a random SDR and associate it with a category. inputData = SDR(1000).randomize(0.02) categories = {'A': 0, 'B': 1, 'C': 2, 'D': 3} clsr = Classifier() clsr.learn(inputData, categories['B']) assert (numpy.argmax(clsr.infer(inputData)) == categories['B']) # Estimate a scalar value. The Classifier only accepts categories, so # put real valued inputs into bins (AKA buckets) by subtracting the # minimum value and dividing by a resolution. scalar = 567.8 minimum = 500 resolution = 10 clsr.learn(inputData, int((scalar - minimum) / resolution)) assert (numpy.argmax(clsr.infer(inputData)) * resolution + minimum == 560) # Predict 1 and 2 time steps into the future. # Make a sequence of 4 random SDRs, each SDR has 1000 bits and 2% sparsity. sequence = [SDR(1000).randomize(0.02) for i in range(4)] # Make category labels for the sequence. labels = [4, 5, 6, 7] # Make a Predictor and train it. pred = Predictor([1, 2]) pred.learn(0, sequence[0], labels[0]) pred.learn(1, sequence[1], labels[1]) pred.learn(2, sequence[2], labels[2]) pred.learn(3, sequence[3], labels[3]) # Give the predictor partial information, and make predictions # about the future. pred.reset() A = pred.infer(0, sequence[0]) assert (numpy.argmax(A[1]) == labels[1]) assert (numpy.argmax(A[2]) == labels[2]) B = pred.infer(1, sequence[1]) assert (numpy.argmax(B[1]) == labels[2]) assert (numpy.argmax(B[2]) == labels[3])
def testSerialization2(self): # This test verifies that pickle works for pickle of a Predictor SDR1 = SDR(15); SDR1.sparse = [1, 5, 9] SDR2 = SDR(15); SDR2.sparse = [0, 6, 9, 11] SDR3 = SDR(15); SDR3.sparse = [6, 9] SDR4 = SDR(15); SDR4.sparse = [1, 5, 9] c1 = Predictor( steps=[1], alpha=1.0 ) c1.learn(1, pattern=SDR1, classification=4) c1.learn(2, pattern=SDR2, classification=5) c1.learn(3, pattern=SDR3, classification=5) c1.learn(4, pattern=SDR4, classification=4) c1.learn(5, pattern=SDR4, classification=4) serialized = pickle.dumps(c1) c2 = pickle.loads(serialized) result1 = c1.infer(SDR1) result2 = c2.infer(SDR1) #print(" testSerialization2 result: %.6f, %.6f, %.6f, %.6f, %.6f, %.6f "%( result1[1][0], result1[1][1], result1[1][2], result1[1][3], result1[1][4], result1[1][5])); self.assertEqual(len(result1[1]), 6) self.assertEqual(len(result1[1]), len(result2[1])) for i in range(len(result1[1])): self.assertAlmostEqual(result1[1][i], result2[1][i], places=5)
def testMultiStepPredictions(self): """ Test multi-step predictions We train the 0-step and the 1-step classifiers simultaneously on data stream (SDR1, bucketIdx0) (SDR2, bucketIdx1) (SDR1, bucketIdx0) (SDR2, bucketIdx1) ... We intend the 0-step classifier to learn the associations: SDR1 => bucketIdx 0 SDR2 => bucketIdx 1 and the 1-step classifier to learn the associations SDR1 => bucketIdx 1 SDR2 => bucketIdx 0 """ c = Predictor([0, 1], 1.0) SDR1 = SDR(10) SDR1.sparse = [1, 3, 5] SDR2 = SDR(10) SDR2.sparse = [2, 4, 6] recordNum = 0 for _ in range(100): c.learn(recordNum, pattern=SDR1, classification=0) recordNum += 1 c.learn(recordNum, pattern=SDR2, classification=1) recordNum += 1 result1 = c.infer(recordNum, SDR1) result2 = c.infer(recordNum, SDR2) self.assertAlmostEqual(result1[0][0], 1.0, places=1) self.assertAlmostEqual(result1[0][1], 0.0, places=1) self.assertAlmostEqual(result2[0][0], 0.0, places=1) self.assertAlmostEqual(result2[0][1], 1.0, places=1)
def building_htm(len_data): global enc_info global sp_info global tm_info global anomaly_history global predictor global predictor_resolution global tm global sp global scalarEncoder global encodingWidth global dateEncoder # Initial message print("Building HTM for predicting trends...") # Default parameters in HTM default_parameters = { # There are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { "value": { 'resolution': 0.88, 'size': 700, 'sparsity': 0.02 }, "time": { 'timeOfDay': (30, 1) } #, 'weekend': 21} }, 'predictor': { 'sdrc_alpha': 0.1 }, 'sp': { 'boostStrength': 3.0, 'columnCount': 1638, 'localAreaDensity': 0.04395604395604396, 'potentialPct': 0.85, 'synPermActiveInc': 0.04, 'synPermConnected': 0.13999999999999999, 'synPermInactiveDec': 0.006 }, 'tm': { 'activationThreshold': 17, 'cellsPerColumn': 13, 'initialPerm': 0.21, 'maxSegmentsPerCell': 128, 'maxSynapsesPerSegment': 64, 'minThreshold': 10, 'newSynapseCount': 32, 'permanenceDec': 0.1, 'permanenceInc': 0.1 }, 'anomaly': { 'likelihood': { 'probationaryPct': 0.1, 'reestimationPeriod': 100 } } } # Make the encoder print("- Make the encoder") dateEncoder = DateEncoder( timeOfDay=default_parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = default_parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = default_parameters["enc"]["value"][ "sparsity"] scalarEncoderParams.resolution = default_parameters["enc"]["value"][ "resolution"] scalarEncoder = RDSE(scalarEncoderParams) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics([encodingWidth], 999999999) # Make the SP print("- Make the SP") spParams = default_parameters["sp"] sp = SpatialPooler(inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) sp_info = Metrics(sp.getColumnDimensions(), 999999999) # Temporal Memory Parameters print("- Make the TM") tmParams = default_parameters["tm"] tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) tm_info = Metrics([tm.numberOfCells()], 999999999) # Setup Likelihood print("- Make Anomaly Score/Likelihood") anParams = default_parameters["anomaly"]["likelihood"] probationaryPeriod = int( math.floor(float(anParams["probationaryPct"]) * len_data)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Make predictor print("- Make the predictor") predictor = Predictor(steps=[1, 5], alpha=default_parameters["predictor"]['sdrc_alpha']) predictor_resolution = 1 # End message print("- Finish the building of HTM!")
def testInitialization(self): Classifier(.1) Predictor([2, 3, 4], .1)
def testSerialization(self): c = Predictor([1], 1.0) c.compute(recordNum=0, patternNZ=[1, 5, 9], classification=4) c.compute(recordNum=1, patternNZ=[0, 6, 9, 11], classification=5) c.compute(recordNum=2, patternNZ=[6, 9], classification=5) c.compute(recordNum=3, patternNZ=[1, 5, 9], classification=4) serialized = pickle.dumps(c) c = pickle.loads(serialized) result = c.compute(recordNum=4, patternNZ=[1, 5, 9], classification=4) self.assertEqual(len(result[1]), 6) self.assertAlmostEqual(result[1][0], 0.034234, places=5) self.assertAlmostEqual(result[1][1], 0.034234, places=5) self.assertAlmostEqual(result[1][2], 0.034234, places=5) self.assertAlmostEqual(result[1][3], 0.034234, places=5) self.assertAlmostEqual(result[1][4], 0.093058, places=5) self.assertAlmostEqual(result[1][5], 0.770004, places=5)
def testMissingRecords(self): """ Test missing record support. Here, we intend the classifier to learn the associations: [1,3,5] => bucketIdx 1 [2,4,6] => bucketIdx 2 [7,8,9] => don"t care If it doesn't pay attention to the recordNums in this test, it will learn the wrong associations. """ c = Predictor(steps=[1], alpha=1.0) recordNum = 0 inp = SDR(10) inp.sparse = [1, 3, 5] c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 inp.sparse = [2, 4, 6] c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 inp.sparse = [1, 3, 5] c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 inp.sparse = [2, 4, 6] c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 # ----------------------------------------------------------------------- # At this point, we should have learned [1,3,5] => bucket 1 # [2,4,6] => bucket 2 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=2) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1) inp.sparse = [2, 4, 6] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=1) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertLess(result[1][1], 0.1) self.assertGreater(result[1][2], 0.9) # ----------------------------------------------------------------------- # Feed in records that skip and make sure they don"t mess up what we # learned # If we skip a record, the CLA should NOT learn that [2,4,6] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1) # If we skip a record, the CLA should NOT learn that [1,3,5] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [2, 4, 6] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertLess(result[1][1], 0.1) self.assertGreater(result[1][2], 0.9) # If we skip a record, the CLA should NOT learn that [2,4,6] from # the previous learn associates with bucket 0 recordNum += 1 inp.sparse = [1, 3, 5] result = c.infer(recordNum=recordNum, pattern=inp) c.learn(recordNum=recordNum, pattern=inp, classification=0) recordNum += 1 self.assertLess(result[1][0], 0.1) self.assertGreater(result[1][1], 0.9) self.assertLess(result[1][2], 0.1)
def testComputeComplex(self): c = Predictor([1], 1.0) inp = SDR(100) inp.sparse = [1, 5, 9] c.learn( recordNum=0, pattern=inp, classification=4, ) inp.sparse = [0, 6, 9, 11] c.learn( recordNum=1, pattern=inp, classification=5, ) inp.sparse = [6, 9] c.learn( recordNum=2, pattern=inp, classification=5, ) inp.sparse = [1, 5, 9] c.learn( recordNum=3, pattern=inp, classification=4, ) inp.sparse = [1, 5, 9] result = c.infer(recordNum=4, pattern=inp) self.assertSetEqual(set(result.keys()), set([1])) self.assertEqual(len(result[1]), 6) self.assertAlmostEqual(result[1][0], 0.034234, places=5) self.assertAlmostEqual(result[1][1], 0.034234, places=5) self.assertAlmostEqual(result[1][2], 0.034234, places=5) self.assertAlmostEqual(result[1][3], 0.034234, places=5) self.assertAlmostEqual(result[1][4], 0.093058, places=5) self.assertAlmostEqual(result[1][5], 0.770004, places=5)
def testComputeInferOrLearnOnly(self): c = Predictor([1], 1.0) inp = SDR(10) inp.randomize(.3) # learn only c.infer(recordNum=0, pattern=inp) # Don't crash with not enough training data. c.learn(recordNum=0, pattern=inp, classification=4) c.infer(recordNum=1, pattern=inp) # Don't crash with not enough training data. c.learn(recordNum=2, pattern=inp, classification=4) c.learn(recordNum=3, pattern=inp, classification=4) # infer only retval1 = c.infer(recordNum=5, pattern=inp) retval2 = c.infer(recordNum=6, pattern=inp) self.assertSequenceEqual(list(retval1[1]), list(retval2[1]))
def main(parameters=default_parameters, argv=None, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # Read the input file. records = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for record in reader: records.append(record) # Make the Encoders. These will convert input data into binary representations. dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] scalarEncoder = RDSE( scalarEncoderParams ) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. spParams = parameters["sp"] sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) tmParams = parameters["tm"] tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) tm_info = Metrics( [tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB anParams = parameters["anomaly"]["likelihood"] probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records))) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod, estimationSamples= probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) predictor_resolution = 1 # Iterate through every datum in the dataset, record the inputs & outputs. inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} for count, record in enumerate(records): # Convert date string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) inputs.append( consumption ) # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = dateEncoder.encode(dateString) consumptionBits = scalarEncoder.encode(consumption) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits]) enc_info.addData( encoding ) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) sp_info.addData( activeColumns ) # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumns, learn=True) tm_info.addData( tm.getActiveCells().flatten() ) # Predict what will happen, and then train the predictor based on what just happened. pdf = predictor.infer( count, tm.getActiveCells() ) for n in (1, 5): if pdf[n]: predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution ) else: predictions[n].append(float('nan')) predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution)) anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly ) anomaly.append( tm.anomaly ) anomalyProb.append( anomalyLikelihood ) # Print information & statistics about the state of the HTM. print("Encoded Input", enc_info) print("") print("Spatial Pooler Mini-Columns", sp_info) print(str(sp)) print("") print("Temporal Memory Cells", tm_info) print(str(tm)) print("") # Shift the predictions so that they are aligned with the input they predict. for n_steps, pred_list in predictions.items(): for x in range(n_steps): pred_list.insert(0, float('nan')) pred_list.pop() # Calculate the predictive accuracy, Root-Mean-Squared accuracy = {1: 0, 5: 0} accuracy_samples = {1: 0, 5: 0} for idx, inp in enumerate(inputs): for n in predictions: # For each [N]umber of time steps ahead which was predicted. val = predictions[n][ idx ] if not math.isnan(val): accuracy[n] += (inp - val) ** 2 accuracy_samples[n] += 1 for n in sorted(predictions): accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5 print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n]) # Show info about the anomaly (mean & std) print("Anomaly Mean", np.mean(anomaly)) print("Anomaly Std ", np.std(anomaly)) # Plot the Predictions and Anomalies. if verbose: try: import matplotlib.pyplot as plt except: print("WARNING: failed to import matplotlib, plots cannot be shown.") return -accuracy[5] plt.subplot(2,1,1) plt.title("Predictions") plt.xlabel("Time") plt.ylabel("Power Consumption") plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), predictions[1], 'blue', np.arange(len(inputs)), predictions[5], 'green',) plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps')) plt.subplot(2,1,2) plt.title("Anomaly Score") plt.xlabel("Time") plt.ylabel("Power Consumption") inputs = np.array(inputs) / max(inputs) plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), anomaly, 'blue',) plt.legend(labels=('Input', 'Anomaly Score')) plt.show() return -accuracy[5]
permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxSegmentsPerCell=128, maxSynapsesPerSegment=64) records = 1200 probationaryPeriod = int(math.floor(float(0.1) * records)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=100) predictor = Predictor(steps=[1, 5], alpha=0.1) predictor_resolution = 1 inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} plot = plt.figure(figsize=(25, 15), dpi=60) warnings.simplefilter('ignore') for count in range(records): dateObject = datetime.datetime.now() cp = subprocess.run(['vcgencmd', 'measure_temp'], encoding='utf-8',