def compile(self): spParams = self.setting("sp") self.sp = SpatialPooler( inputDimensions=self.input_shape, columnDimensions=self.output_shape, potentialPct=spParams.potentialPct, potentialRadius=spParams.potentialRadius, globalInhibition=True if len(self.output_shape) == 1 else False, localAreaDensity=spParams.localAreaDensity, synPermInactiveDec=spParams.synPermInactiveDec, synPermActiveInc=spParams.synPermActiveInc, synPermConnected=spParams.synPermConnected, boostStrength=spParams.boostStrength, wrapAround=True, ) if self.temporal: tmParams = self.setting("tm") self.tm = TemporalMemory( columnDimensions=self.output_shape, cellsPerColumn=tmParams.cellsPerColumn, activationThreshold=tmParams.activationThreshold, initialPermanence=tmParams.initialPerm, connectedPermanence=spParams.synPermConnected, minThreshold=tmParams.minThreshold, maxNewSynapseCount=tmParams.newSynapseCount, permanenceIncrement=tmParams.permanenceInc, permanenceDecrement=tmParams.permanenceDec, predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams.maxSegmentsPerCell, maxSynapsesPerSegment=tmParams.maxSynapsesPerSegment)
def testIssue807(): # The following should silently pass. Previous versions segfaulted. # See https://github.com/numenta/nupic.core/issues/807 for context from htm.bindings.algorithms import TemporalMemory tm = TemporalMemory() tm.compute(set(), True)
def initialize(self): # Setup Encoders self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) for idx, (key, value) in enumerate(self.init_min_max.items()): self.scalar_encoders.append({ 'idx': idx, 'name': key, 'encoder': self._createRDSE(min_val=value['min'], max_val=value['max']) }) self.enc_width = self.encTimestamp.size + sum([enc_info.get('encoder').size for enc_info in self.scalar_encoders]) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(self.enc_width,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=self.enc_width, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"])
def __init__(self, din=(10, 10), dout=(10, 10), temporal=True, setting=param.default_parameters): self.input_shape = din self.output_shape = dout self.temporal = temporal self.learn = True self.setting = AttrDict(setting) self.sp = SpatialPooler() self.tm = TemporalMemory() if temporal else None
def testNupicTemporalMemoryPickling(self): """Test pickling / unpickling of NuPIC TemporalMemory.""" from htm.bindings.algorithms import TemporalMemory # Simple test: make sure that dumping / loading works... tm = TemporalMemory(columnDimensions=(16, )) pickledTm = pickle.dumps(tm) tm2 = pickle.loads(pickledTm) self.assertEqual( tm.numberOfCells(), tm2.numberOfCells(), "Simple NuPIC TemporalMemory pickle/unpickle failed.")
def __init__(self, world, parameters=default_parameters): self.world = world self.area_size = parameters['num_cells'] self.num_areas = len(self.world.coordinates) # Make an RDSE for every location. self.enc = np.zeros(self.world.dims, dtype=object) enc_parameters = RDSE_Parameters() enc_parameters.size = self.area_size enc_parameters.sparsity = parameters['local_sparsity'] enc_parameters.category = True for coords in self.world.coordinates: self.enc[coords] = RDSE(enc_parameters) # Make empty buffers for the working data. self.local = np.zeros(self.world.dims, dtype=object) self.gnw = np.zeros(self.world.dims, dtype=object) for coords in self.world.coordinates: self.local[coords] = SDR((self.area_size, )) self.gnw[coords] = SDR((self.area_size, )) # Make an instance of the model at every location. self.apical_denrites = np.zeros(self.world.dims, dtype=object) self.gnw_size = self.num_areas * self.area_size for coords in self.world.coordinates: self.apical_denrites[coords] = TemporalMemory( [self.area_size], # column_dimensions cellsPerColumn=1, externalPredictiveInputs=self.gnw_size, seed=0, **parameters['apical_denrites'])
def LoadDistalSynapses(self, layer, column, cell, iteration): timeStartDistalSynapsesCalc = time.time() tm = TemporalMemory() tm.loadFromFile( os.path.join( os.path.splitext(self.databaseFilePath)[0] + "_distalDump", "" + str(layer) + "_" + str(iteration) + ".dump")) reqCellID = int(column * self.layers[layer].params['tm_cellsPerColumn'] + cell) print("requesting distals for cell:" + str(reqCellID)) segments = self.getPresynapticCellsForCell(tm, reqCellID) segNo = 0 for seg in segments: # for each segment if cell not in self.layers[layer].distalSynapses.keys(): self.layers[layer].distalSynapses[cell] = {} self.layers[layer].distalSynapses[cell][ segNo] = seg # add numpy array to the dict segNo += 1 return len(segments) > 0 # true if we got something for this cell
class UnivHTMDetector(object): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, name, probationaryPeriod, smoothingKernelSize, htmParams=None, verbose=False): self.useSpatialAnomaly = True self.verbose = verbose self.name = name # for logging self.probationaryPeriod = probationaryPeriod self.parameters = parameters_best self.minVal = None self.maxVal = None self.spatial_tolerance = None self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anomalyLikelihood = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # for initialization self.init_data = [] self.is_initialized = False self.iteration_ = 0 # for smoothing with gaussian self.historic_raw_anomaly_scores = deque(maxlen=smoothingKernelSize) self.kernel = None self.learningPeriod = None def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen) def modelRun(self, ts, val): """ Run a single pass through HTM model @config ts - Timestamp @config val - float input value @return rawAnomalyScore computed for the `val` in this step """ self.iteration_ += 1 # 0. During the probation period, gather the data and return 0.01. if self.iteration_ <= self.probationaryPeriod: self.init_data.append((ts, val)) return 0.01 if self.is_initialized is False: if self.verbose: print("[{}] Initializing".format(self.name)) temp_iteration = self.iteration_ vals = [i[1] for i in self.init_data] self.initialize(input_min=min(vals), input_max=max(vals)) self.is_initialized = True for ts, val in self.init_data: self.modelRun(ts, val) self.iteration_ = temp_iteration if self.verbose: print("[{}] Initialization done".format(self.name)) ## run data through our model pipeline: enc -> SP -> TM -> Anomaly # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4. Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * self.spatial_tolerance maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -Temporal raw = self.tm.anomaly like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. Apply smoothing self.historic_raw_anomaly_scores.append(anomalyScore) historic_scores = np.asarray(self.historic_raw_anomaly_scores) convolved = np.convolve(historic_scores, self.kernel, 'valid') anomalyScore = convolved[-1] return anomalyScore @staticmethod def estimateNormal(sampleData, performLowerBoundCheck=True): """ :param sampleData: :type sampleData: Numpy array. :param performLowerBoundCheck: :type performLowerBoundCheck: bool :returns: A dict containing the parameters of a normal distribution based on the ``sampleData``. """ mean = np.mean(sampleData) variance = np.var(sampleData) st_dev = 0 if performLowerBoundCheck: # Handle edge case of almost no deviations and super low anomaly scores. We # find that such low anomaly means can happen, but then the slightest blip # of anomaly score can cause the likelihood to jump up to red. if mean < 0.03: mean = 0.03 # Catch all for super low variance to handle numerical precision issues if variance < 0.0003: variance = 0.0003 # Compute standard deviation if variance > 0: st_dev = math.sqrt(variance) return mean, variance, st_dev @staticmethod def _calcSkipRecords(numIngested, windowSize, learningPeriod): """Return the value of skipRecords for passing to estimateAnomalyLikelihoods If `windowSize` is very large (bigger than the amount of data) then this could just return `learningPeriod`. But when some values have fallen out of the historical sliding window of anomaly records, then we have to take those into account as well so we return the `learningPeriod` minus the number shifted out. :param numIngested - (int) number of data points that have been added to the sliding window of historical data points. :param windowSize - (int) size of sliding window of historical data points. :param learningPeriod - (int) the number of iterations required for the algorithm to learn the basic patterns in the dataset and for the anomaly score to 'settle down'. """ numShiftedOut = max(0, numIngested - windowSize) return min(numIngested, max(0, learningPeriod - numShiftedOut)) @staticmethod def _gauss_kernel(std, size): def _norm_pdf(x, mean, sd): var = float(sd) ** 2 denom = (2 * math.pi * var) ** .5 num = math.exp(-(float(x) - float(mean)) ** 2 / (2 * var)) return num / denom kernel = [2 * _norm_pdf(idx, 0, std) for idx in list(range(-size + 1, 1))] kernel = np.array(kernel) kernel = np.flip(kernel) kernel = kernel / sum(kernel) return kernel
def initialize(self, input_min=0, input_max=0): # setup spatial anomaly if self.useSpatialAnomaly: self.spatial_tolerance = self.parameters["spatial_tolerance"] ## setup Enc, SP, TM # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] self.encValue = RDSE(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=encodingWidth, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen, self.historic_raw_anomaly_scores.maxlen)
def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size)
class HtmcoreDetector(AnomalyDetector): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, *args, **kwargs): super(HtmcoreDetector, self).__init__(*args, **kwargs) ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.useSpatialAnomaly = True self.verbose = True # Set this to true if you want to use the optimization. # If true, it reads the parameters from ./params.json # If false, it reads the parameters from ./best_params.json self.use_optimization = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def getAdditionalHeaders(self): """Returns a list of strings.""" return ["raw_score"] #TODO optional: add "prediction" def handleRecord(self, inputData): """Returns a tuple (anomalyScore, rawScore). @param inputData is a dict {"timestamp" : Timestamp(), "value" : float} @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(inputData["timestamp"], inputData["value"]) def initialize(self): # toggle parameters here if self.use_optimization: parameters = get_params('params.json') else: parameters = parameters_numenta_comparable # setup spatial anomaly if self.useSpatialAnomaly: # Keep track of value range for spatial anomaly detection self.minVal = None self.maxVal = None ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] self.encValue = RDSE( scalarEncoderParams ) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 ) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod= learningPeriod, estimationSamples= self.probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 # initialize pandaBaker if PANDA_VIS_BAKE_DATA: self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size) def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append( val ) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits]) self.enc_info.addData( encoding ) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( self.sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData( activeColumns ) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. # to get predictive cells we need to call activateDendrites & activateCells separately if PANDA_VIS_BAKE_DATA: # activateDendrites calculates active segments self.tm.activateDendrites(learn=True) # predictive cells are calculated directly from active segments predictiveCells = self.tm.getPredictiveCells() # activates cells in columns by TM algorithm (winners, bursting...) self.tm.activateCells(activeColumns, learn=True) else: self.tm.compute(activeColumns, learn=True) self.tm_info.addData( self.tm.getActiveCells().flatten() ) # 4.1 (optional) Predictor #TODO optional #TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle spatial, contextual (raw, likelihood) anomalies # -Spatial spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve) if self.useSpatialAnomaly: # Update min/max values and check if there is a spatial anomaly if self.minVal != self.maxVal: tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE maxExpected = self.maxVal + tolerance minExpected = self.minVal - tolerance if val > maxExpected or val < minExpected: spatialAnomaly = 1.0 if self.maxVal is None or val > self.maxVal: self.maxVal = val if self.minVal is None or val < self.minVal: self.minVal = val # -temporal (raw) raw= self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: # print(self.enc_info) # print(self.sp_info) # print(self.tm_info) pass # 6. panda vis if PANDA_VIS_BAKE_DATA: # ------------------HTMpandaVis---------------------- # see more about this structure at https://github.com/htm-community/HTMpandaVis/blob/master/pandaBaker/README.md # fill up values pandaBaker.inputs["Value"].stringValue = "value: {:.2f}".format(val) pandaBaker.inputs["Value"].bits = valueBits.sparse pandaBaker.inputs["TimeOfDay"].stringValue = str(ts) pandaBaker.inputs["TimeOfDay"].bits = dateBits.sparse pandaBaker.layers["Layer1"].activeColumns = activeColumns.sparse pandaBaker.layers["Layer1"].winnerCells = self.tm.getWinnerCells().sparse pandaBaker.layers["Layer1"].predictiveCells = predictiveCells.sparse pandaBaker.layers["Layer1"].activeCells = self.tm.getActiveCells().sparse # customizable datastreams to be show on the DASH PLOTS pandaBaker.dataStreams["rawAnomaly"].value = temporalAnomaly pandaBaker.dataStreams["value"].value = val pandaBaker.dataStreams["numberOfWinnerCells"].value = len(self.tm.getWinnerCells().sparse) pandaBaker.dataStreams["numberOfPredictiveCells"].value = len(predictiveCells.sparse) pandaBaker.dataStreams["valueInput_sparsity"].value = valueBits.getSparsity() pandaBaker.dataStreams["dateInput_sparsity"].value = dateBits.getSparsity() pandaBaker.dataStreams["Layer1_SP_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_TM_overlap_metric"].value = self.sp_info.overlap.overlap pandaBaker.dataStreams["Layer1_SP_activation_frequency"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_activation_frequency"].value = self.tm_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_SP_entropy"].value = self.sp_info.activationFrequency.mean() pandaBaker.dataStreams["Layer1_TM_entropy"].value = self.tm_info.activationFrequency.mean() pandaBaker.StoreIteration(self.iteration_-1) print("ITERATION: " + str(self.iteration_-1)) # ------------------HTMpandaVis---------------------- return (anomalyScore, raw) # with this method, the structure for visualization is defined def BuildPandaSystem(self, sp, tm, consumptionBits_size, dateBits_size): # we have two inputs connected to proximal synapses of Layer1 pandaBaker.inputs["Value"] = cInput(consumptionBits_size) pandaBaker.inputs["TimeOfDay"] = cInput(dateBits_size) pandaBaker.layers["Layer1"] = cLayer(sp, tm) # Layer1 has Spatial Pooler & Temporal Memory pandaBaker.layers["Layer1"].proximalInputs = [ "Value", "TimeOfDay", ] pandaBaker.layers["Layer1"].distalInputs = ["Layer1"] # data for dash plots streams = ["rawAnomaly", "value", "numberOfWinnerCells", "numberOfPredictiveCells", "valueInput_sparsity", "dateInput_sparsity", "Layer1_SP_overlap_metric", "Layer1_TM_overlap_metric", "Layer1_SP_activation_frequency", "Layer1_TM_activation_frequency", "Layer1_SP_entropy", "Layer1_TM_entropy" ] pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams) # create dicts for more comfortable code # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream() pandaBaker.PrepareDatabase()
class HTMCoreDetector(object): def __init__(self, inputMin, inputMax, probationaryPeriod, *args, **kwargs): self.inputMin = inputMin self.inputMax = inputMax self.probationaryPeriod = probationaryPeriod ## API for controlling settings of htm.core HTM detector: # Set this to False if you want to get results based on raw scores # without using AnomalyLikelihood. This will give worse results, but # useful for checking the efficacy of AnomalyLikelihood. You will need # to re-optimize the thresholds when running with this setting. self.useLikelihood = True self.verbose = False ## internal members # (listed here for easier understanding) # initialized in `initialize()` self.encTimestamp = None self.encValue = None self.sp = None self.tm = None self.anLike = None # optional debug info self.enc_info = None self.sp_info = None self.tm_info = None # internal helper variables: self.inputs_ = [] self.iteration_ = 0 def handleRecord(self, ts, val): """Returns a tuple (anomalyScore, rawScore). @param ts Timestamp @param val float @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...) """ # Send it to Numenta detector and get back the results return self.modelRun(ts, val) def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Predictor # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) # predictor_resolution = 1 def modelRun(self, ts, val): """ Run a single pass through HTM model @params ts - Timestamp @params val - float input value @return rawAnomalyScore computed for the `val` in this step """ ## run data through our model pipeline: enc -> SP -> TM -> Anomaly self.inputs_.append(val) self.iteration_ += 1 # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) valueBits = self.encValue.encode(float(val)) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits]) self.enc_info.addData(encoding) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) self.sp_info.addData(activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) self.tm_info.addData(self.tm.getActiveCells().flatten()) # 4.1 (optional) Predictor #TODO optional # TODO optional: also return an error metric on predictions (RMSE, R2,...) # 4.2 Anomaly # handle contextual (raw, likelihood) anomalies # -temporal (raw) raw = self.tm.anomaly temporalAnomaly = raw if self.useLikelihood: # Compute log(anomaly likelihood) like = self.anomalyLikelihood.anomalyProbability(val, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) temporalAnomaly = logScore # TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood anomalyScore = temporalAnomaly # this is the "main" anomaly, compared in NAB # 5. print stats if self.verbose and self.iteration_ % 1000 == 0: print(self.enc_info) print(self.sp_info) print(self.tm_info) pass return anomalyScore, raw
def building_htm(len_data): global enc_info global sp_info global tm_info global anomaly_history global predictor global predictor_resolution global tm global sp global scalarEncoder global encodingWidth global dateEncoder # Initial message print("Building HTM for predicting trends...") # Default parameters in HTM default_parameters = { # There are 2 (3) encoders: "value" (RDSE) & "time" (DateTime weekend, timeOfDay) 'enc': { "value": { 'resolution': 0.88, 'size': 700, 'sparsity': 0.02 }, "time": { 'timeOfDay': (30, 1) } #, 'weekend': 21} }, 'predictor': { 'sdrc_alpha': 0.1 }, 'sp': { 'boostStrength': 3.0, 'columnCount': 1638, 'localAreaDensity': 0.04395604395604396, 'potentialPct': 0.85, 'synPermActiveInc': 0.04, 'synPermConnected': 0.13999999999999999, 'synPermInactiveDec': 0.006 }, 'tm': { 'activationThreshold': 17, 'cellsPerColumn': 13, 'initialPerm': 0.21, 'maxSegmentsPerCell': 128, 'maxSynapsesPerSegment': 64, 'minThreshold': 10, 'newSynapseCount': 32, 'permanenceDec': 0.1, 'permanenceInc': 0.1 }, 'anomaly': { 'likelihood': { 'probationaryPct': 0.1, 'reestimationPeriod': 100 } } } # Make the encoder print("- Make the encoder") dateEncoder = DateEncoder( timeOfDay=default_parameters["enc"]["time"]["timeOfDay"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = default_parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = default_parameters["enc"]["value"][ "sparsity"] scalarEncoderParams.resolution = default_parameters["enc"]["value"][ "resolution"] scalarEncoder = RDSE(scalarEncoderParams) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics([encodingWidth], 999999999) # Make the SP print("- Make the SP") spParams = default_parameters["sp"] sp = SpatialPooler(inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) sp_info = Metrics(sp.getColumnDimensions(), 999999999) # Temporal Memory Parameters print("- Make the TM") tmParams = default_parameters["tm"] tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) tm_info = Metrics([tm.numberOfCells()], 999999999) # Setup Likelihood print("- Make Anomaly Score/Likelihood") anParams = default_parameters["anomaly"]["likelihood"] probationaryPeriod = int( math.floor(float(anParams["probationaryPct"]) * len_data)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) # Make predictor print("- Make the predictor") predictor = Predictor(steps=[1, 5], alpha=default_parameters["predictor"]['sdrc_alpha']) predictor_resolution = 1 # End message print("- Finish the building of HTM!")
class Layer: def __init__(self, din=(10, 10), dout=(10, 10), temporal=True, setting=param.default_parameters): self.input_shape = din self.output_shape = dout self.temporal = temporal self.learn = True self.setting = AttrDict(setting) self.sp = SpatialPooler() self.tm = TemporalMemory() if temporal else None def compile(self): spParams = self.setting("sp") self.sp = SpatialPooler( inputDimensions=self.input_shape, columnDimensions=self.output_shape, potentialPct=spParams.potentialPct, potentialRadius=spParams.potentialRadius, globalInhibition=True if len(self.output_shape) == 1 else False, localAreaDensity=spParams.localAreaDensity, synPermInactiveDec=spParams.synPermInactiveDec, synPermActiveInc=spParams.synPermActiveInc, synPermConnected=spParams.synPermConnected, boostStrength=spParams.boostStrength, wrapAround=True, ) if self.temporal: tmParams = self.setting("tm") self.tm = TemporalMemory( columnDimensions=self.output_shape, cellsPerColumn=tmParams.cellsPerColumn, activationThreshold=tmParams.activationThreshold, initialPermanence=tmParams.initialPerm, connectedPermanence=spParams.synPermConnected, minThreshold=tmParams.minThreshold, maxNewSynapseCount=tmParams.newSynapseCount, permanenceIncrement=tmParams.permanenceInc, permanenceDecrement=tmParams.permanenceDec, predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams.maxSegmentsPerCell, maxSynapsesPerSegment=tmParams.maxSynapsesPerSegment) def forward(self, encoding): activeColumns = SDR(self.sp.getColumnDimensions()) self.sp.compute(encoding, self.learn, activeColumns) predictedColumns = None if self.temporal: self.tm.compute(activeColumns, self.learn) self.tm.activateDendrites(self.learn) predictedColumnIndices = { self.tm.columnForCell(i) for i in self.tm.getPredictiveCells().sparse } predictedColumns = SDR(self.sp.getColumnDimensions()) predictedColumns.sparse = list(predictedColumnIndices) return activeColumns, predictedColumns def train(self): self.learn = True def eval(self): self.learn = False def anomaly(self): return float(self.tm.anomaly) if self.temporal else None def reset(self): if self.temporal: self.tm.reset() def save(self, path): print('Saving Model...') print(str(self.sp)) self.sp.saveToFile(param.sp_model.format(path)) if self.temporal: print(str(self.tm)) self.tm.saveToFile(param.tm_model.format(path)) def load(self, path): print('Loading Model...') self.sp.loadFromFile(param.sp_model.format(path)) print(str(self.sp)) if self.temporal: self.tm.loadFromFile(param.tm_model.format(path)) print(str(self.tm))
class MultivHTMDetector(object): """ This detector uses an HTM based anomaly detection technique. """ def __init__(self, name, probationaryPeriod, params=None, verbose=False): self.verbose = verbose self.name = name # for logging self.probationaryPeriod = probationaryPeriod if params is not None: self.parameters = params else: self.parameters = parameters_best self.encTimestamp = None self.scalar_encoders = [] self.enc_width = None self.sp = None self.tm = None self.anomalyLikelihood = None # for initialization self.init_data = [] self.init_min_max = {} self.is_initialized = False self.iteration_ = 0 self.learningPeriod = None def _createRDSE(self, min_val=0, max_val=0): scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = self.parameters["enc"]["value"]["size"] scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"] scalarEncoderParams.resolution = max(0.001, (max_val - min_val) / 130) scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"] return RDSE(scalarEncoderParams) def initialize(self): # Setup Encoders self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"]) for idx, (key, value) in enumerate(self.init_min_max.items()): self.scalar_encoders.append({ 'idx': idx, 'name': key, 'encoder': self._createRDSE(min_val=value['min'], max_val=value['max']) }) self.enc_width = self.encTimestamp.size + sum([enc_info.get('encoder').size for enc_info in self.scalar_encoders]) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = self.parameters["sp"] self.sp = SpatialPooler( inputDimensions=(self.enc_width,), columnDimensions=(spParams["columnDimensions"],), potentialRadius=self.enc_width, potentialPct=spParams["potentialPct"], globalInhibition=spParams["globalInhibition"], localAreaDensity=spParams["localAreaDensity"], numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=spParams["wrapAround"], minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"], dutyCyclePeriod=spParams["dutyCyclePeriod"], seed=spParams["seed"], ) # TemporalMemory tmParams = self.parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnDimensions"],), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPermanence"], connectedPermanence=tmParams["connectedPermanence"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["maxNewSynapseCount"], permanenceIncrement=tmParams["permanenceIncrement"], permanenceDecrement=tmParams["permanenceDecrement"], predictedSegmentDecrement=tmParams["predictedSegmentDecrement"], maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], seed=tmParams["seed"] ) anParams = self.parameters["anomaly"]["likelihood"] self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=self.learningPeriod, estimationSamples=self.probationaryPeriod - self.learningPeriod, reestimationPeriod=anParams["reestimationPeriod"]) def modelRun(self, ts, data): """ Run a single pass through HTM model @config ts - Timestamp @config val - float input value @return rawAnomalyScore computed for the `val` in this step """ self.iteration_ += 1 # 0. During the probation period, gather the data and return 0.01. if self.iteration_ <= self.probationaryPeriod: self.init_data.append((ts, data)) for col_name, val in data.items(): if val is None: continue if col_name not in self.init_min_max: self.init_min_max[col_name] = {} if 'min' not in self.init_min_max[col_name] or val < self.init_min_max[col_name]['min']: self.init_min_max[col_name]['min'] = val if 'max' not in self.init_min_max[col_name] or val > self.init_min_max[col_name]['max']: self.init_min_max[col_name]['max'] = val return 0.01, 0.01 if self.is_initialized is False: if self.verbose: print("[{}] Initializing".format(self.name)) temp_iteration = self.iteration_ self.initialize() self.is_initialized = True for ts, data in self.init_data: self.modelRun(ts, data) self.iteration_ = temp_iteration if self.verbose: print("[{}] Initialization done".format(self.name)) # run data through model pipeline: enc -> SP -> TM -> Anomaly # 1. Encoding # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = self.encTimestamp.encode(ts) scalarBits = [] for enc_info in sorted(self.scalar_encoders, key=lambda i: i.get('idx')): name = enc_info.get('name') encoder = enc_info.get('encoder') val = data.get(name) if val is None: raise Exception('Value for {} is None. Aborting.'.format(name)) scalarBits.append(encoder.encode(float(val))) encoding = SDR(self.enc_width).concatenate([dateBits] + scalarBits) # 2. Spatial Pooler # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR(self.sp.getColumnDimensions()) # Execute Spatial Pooling algorithm over input space. self.sp.compute(encoding, True, activeColumns) # 3. Temporal Memory # Execute Temporal Memory algorithm over active mini-columns. self.tm.compute(activeColumns, learn=True) # 4. Anomaly raw = self.tm.anomaly like = self.anomalyLikelihood.anomalyProbability(data, raw, ts) logScore = self.anomalyLikelihood.computeLogLikelihood(like) anomalyScore = logScore return anomalyScore, raw
potentialRadius=encodingWidth, globalInhibition=True, localAreaDensity=0.04395604395604396, synPermInactiveDec=0.006, synPermActiveInc=0.04, synPermConnected=0.13999999999999999, boostStrength=3.0, wrapAround=True) tm = TemporalMemory( columnDimensions=(1638, ), #sp.columnDimensions cellsPerColumn=13, activationThreshold=17, initialPermanence=0.21, connectedPermanence=0.13999999999999999, #sp.synPermConnected minThreshold=10, maxNewSynapseCount=32, permanenceIncrement=0.1, permanenceDecrement=0.1, predictedSegmentDecrement=0.0, maxSegmentsPerCell=128, maxSynapsesPerSegment=64) records = 1200 probationaryPeriod = int(math.floor(float(0.1) * records)) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod=learningPeriod, estimationSamples=probationaryPeriod - learningPeriod, reestimationPeriod=100)
def main(parameters=default_parameters, argv=None, verbose=True): if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # Read the input file. records = [] with open(_INPUT_FILE_PATH, "r") as fin: reader = csv.reader(fin) headers = next(reader) next(reader) next(reader) for record in reader: records.append(record) # Make the Encoders. These will convert input data into binary representations. dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], weekend = parameters["enc"]["time"]["weekend"]) scalarEncoderParams = RDSE_Parameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"] scalarEncoder = RDSE( scalarEncoderParams ) encodingWidth = (dateEncoder.size + scalarEncoder.size) enc_info = Metrics( [encodingWidth], 999999999 ) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. spParams = parameters["sp"] sp = SpatialPooler( inputDimensions = (encodingWidth,), columnDimensions = (spParams["columnCount"],), potentialPct = spParams["potentialPct"], potentialRadius = encodingWidth, globalInhibition = True, localAreaDensity = spParams["localAreaDensity"], synPermInactiveDec = spParams["synPermInactiveDec"], synPermActiveInc = spParams["synPermActiveInc"], synPermConnected = spParams["synPermConnected"], boostStrength = spParams["boostStrength"], wrapAround = True ) sp_info = Metrics( sp.getColumnDimensions(), 999999999 ) tmParams = parameters["tm"] tm = TemporalMemory( columnDimensions = (spParams["columnCount"],), cellsPerColumn = tmParams["cellsPerColumn"], activationThreshold = tmParams["activationThreshold"], initialPermanence = tmParams["initialPerm"], connectedPermanence = spParams["synPermConnected"], minThreshold = tmParams["minThreshold"], maxNewSynapseCount = tmParams["newSynapseCount"], permanenceIncrement = tmParams["permanenceInc"], permanenceDecrement = tmParams["permanenceDec"], predictedSegmentDecrement = 0.0, maxSegmentsPerCell = tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment = tmParams["maxSynapsesPerSegment"] ) tm_info = Metrics( [tm.numberOfCells()], 999999999 ) # setup likelihood, these settings are used in NAB anParams = parameters["anomaly"]["likelihood"] probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records))) learningPeriod = int(math.floor(probationaryPeriod / 2.0)) anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod, estimationSamples= probationaryPeriod - learningPeriod, reestimationPeriod= anParams["reestimationPeriod"]) predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] ) predictor_resolution = 1 # Iterate through every datum in the dataset, record the inputs & outputs. inputs = [] anomaly = [] anomalyProb = [] predictions = {1: [], 5: []} for count, record in enumerate(records): # Convert date string into Python date object. dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M") # Convert data value string into float. consumption = float(record[1]) inputs.append( consumption ) # Call the encoders to create bit representations for each value. These are SDR objects. dateBits = dateEncoder.encode(dateString) consumptionBits = scalarEncoder.encode(consumption) # Concatenate all these encodings into one large encoding for Spatial Pooling. encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits]) enc_info.addData( encoding ) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. activeColumns = SDR( sp.getColumnDimensions() ) # Execute Spatial Pooling algorithm over input space. sp.compute(encoding, True, activeColumns) sp_info.addData( activeColumns ) # Execute Temporal Memory algorithm over active mini-columns. tm.compute(activeColumns, learn=True) tm_info.addData( tm.getActiveCells().flatten() ) # Predict what will happen, and then train the predictor based on what just happened. pdf = predictor.infer( count, tm.getActiveCells() ) for n in (1, 5): if pdf[n]: predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution ) else: predictions[n].append(float('nan')) predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution)) anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly ) anomaly.append( tm.anomaly ) anomalyProb.append( anomalyLikelihood ) # Print information & statistics about the state of the HTM. print("Encoded Input", enc_info) print("") print("Spatial Pooler Mini-Columns", sp_info) print(str(sp)) print("") print("Temporal Memory Cells", tm_info) print(str(tm)) print("") # Shift the predictions so that they are aligned with the input they predict. for n_steps, pred_list in predictions.items(): for x in range(n_steps): pred_list.insert(0, float('nan')) pred_list.pop() # Calculate the predictive accuracy, Root-Mean-Squared accuracy = {1: 0, 5: 0} accuracy_samples = {1: 0, 5: 0} for idx, inp in enumerate(inputs): for n in predictions: # For each [N]umber of time steps ahead which was predicted. val = predictions[n][ idx ] if not math.isnan(val): accuracy[n] += (inp - val) ** 2 accuracy_samples[n] += 1 for n in sorted(predictions): accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5 print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n]) # Show info about the anomaly (mean & std) print("Anomaly Mean", np.mean(anomaly)) print("Anomaly Std ", np.std(anomaly)) # Plot the Predictions and Anomalies. if verbose: try: import matplotlib.pyplot as plt except: print("WARNING: failed to import matplotlib, plots cannot be shown.") return -accuracy[5] plt.subplot(2,1,1) plt.title("Predictions") plt.xlabel("Time") plt.ylabel("Power Consumption") plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), predictions[1], 'blue', np.arange(len(inputs)), predictions[5], 'green',) plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps')) plt.subplot(2,1,2) plt.title("Anomaly Score") plt.xlabel("Time") plt.ylabel("Power Consumption") inputs = np.array(inputs) / max(inputs) plt.plot(np.arange(len(inputs)), inputs, 'red', np.arange(len(inputs)), anomaly, 'blue',) plt.legend(labels=('Input', 'Anomaly Score')) plt.show() return -accuracy[5]
def SystemSetup(parameters, verbose=True): global agent, sensorEncoder, env, sensorLayer_sp, sensorLayer_SDR_columns global gridCellEncoder, locationlayer_SDR_cells global sensorLayer_tm if verbose: import pprint print("Parameters:") pprint.pprint(parameters, indent=4) print("") # create environment and the agent env = htm2d.environment.TwoDimensionalEnvironment(20, 20) agent = htm2d.agent.Agent() # load object from yml file with open(os.path.join(_OBJECTS_DIR, OBJECT_FILENAME), "r") as stream: try: env.load_object(stream) except yaml.YAMLError as exc: print(exc) # SENSOR LAYER -------------------------------------------------------------- # setup sensor encoder sensorEncoderParams = RDSE_Parameters() sensorEncoderParams.category = True sensorEncoderParams.size = parameters["enc"]["size"] sensorEncoderParams.sparsity = parameters["enc"]["sparsity"] sensorEncoderParams.seed = parameters["enc"]["seed"] sensorEncoder = RDSE(sensorEncoderParams) # Create SpatialPooler spParams = parameters["sensorLayer_sp"] sensorLayer_sp = SpatialPooler( inputDimensions=(sensorEncoder.size, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=sensorEncoder.size, globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True, ) sp_info = Metrics(sensorLayer_sp.getColumnDimensions(), 999999999) # Create an SDR to represent active columns, This will be populated by the # compute method below. It must have the same dimensions as the Spatial Pooler. sensorLayer_SDR_columns = SDR(spParams["columnCount"]) # LOCATION LAYER ------------------------------------------------------------ # Grid cell modules locParams = parameters["locationLayer"] gridCellEncoder = GridCellEncoder( size=locParams["cellCount"], sparsity=locParams["sparsity"], periods=locParams["periods"], seed=locParams["seed"], ) locationlayer_SDR_cells = SDR(gridCellEncoder.dimensions) tmParams = parameters["sensorLayer_tm"] sensorLayer_tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"], externalPredictiveInputs=locParams["cellCount"], ) tm_info = Metrics([sensorLayer_tm.numberOfCells()], 999999999)
def initialize(self): # toggle parameters here # parameters = default_parameters parameters = parameters_numenta_comparable ## setup Enc, SP, TM, Likelihood # Make the Encoders. These will convert input data into binary representations. self.encTimestamp = DateEncoder( timeOfDay=parameters["enc"]["time"]["timeOfDay"], weekend=parameters["enc"]["time"]["weekend"], season=parameters["enc"]["time"]["season"], dayOfWeek=parameters["enc"]["time"]["dayOfWeek"]) scalarEncoderParams = EncParameters() scalarEncoderParams.size = parameters["enc"]["value"]["size"] scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"] scalarEncoderParams.resolution = parameters["enc"]["value"][ "resolution"] self.encValue = Encoder(scalarEncoderParams) encodingWidth = (self.encTimestamp.size + self.encValue.size) self.enc_info = Metrics([encodingWidth], 999999999) # Make the HTM. SpatialPooler & TemporalMemory & associated tools. # SpatialPooler spParams = parameters["sp"] self.sp = SpatialPooler( inputDimensions=(encodingWidth, ), columnDimensions=(spParams["columnCount"], ), potentialPct=spParams["potentialPct"], potentialRadius=spParams["potentialRadius"], globalInhibition=True, localAreaDensity=spParams["localAreaDensity"], stimulusThreshold=spParams["stimulusThreshold"], synPermInactiveDec=spParams["synPermInactiveDec"], synPermActiveInc=spParams["synPermActiveInc"], synPermConnected=spParams["synPermConnected"], boostStrength=spParams["boostStrength"], wrapAround=True) self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999) # TemporalMemory tmParams = parameters["tm"] self.tm = TemporalMemory( columnDimensions=(spParams["columnCount"], ), cellsPerColumn=tmParams["cellsPerColumn"], activationThreshold=tmParams["activationThreshold"], initialPermanence=tmParams["initialPerm"], connectedPermanence=spParams["synPermConnected"], minThreshold=tmParams["minThreshold"], maxNewSynapseCount=tmParams["newSynapseCount"], permanenceIncrement=tmParams["permanenceInc"], permanenceDecrement=tmParams["permanenceDec"], predictedSegmentDecrement=0.0, maxSegmentsPerCell=tmParams["maxSegmentsPerCell"], maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"]) self.tm_info = Metrics([self.tm.numberOfCells()], 999999999) # setup likelihood, these settings are used in NAB if self.useLikelihood: anParams = parameters["anomaly"]["likelihood"] learningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=self.probationaryPeriod - learningPeriod, reestimationPeriod=anParams["reestimationPeriod"])