Python Metrics.addData примеры использования

Язык программирования: Python

Пространство имен/Пакет: htm.bindings.sdr

Класс/Тип: Metrics

Метод/Функция: addData

Примеров на hotexamples.com: 7

Python Metrics.addData - 7 примеров найдено. Это лучшие примеры Python кода для htm.bindings.sdr.Metrics.addData, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Metrics(18)

addData(7)

Основные методы

Metrics (18)

addData (7)

Пример #1

Показать файл

Файл: univ_htm_detector.py Проект: steinroe/prognostics_benchmark

class UnivHTMDetector(object):
    """
    This detector uses an HTM based anomaly detection technique.
    """

    def __init__(self, name, probationaryPeriod, smoothingKernelSize, htmParams=None, verbose=False):
        self.useSpatialAnomaly = True
        self.verbose = verbose
        self.name = name  # for logging

        self.probationaryPeriod = probationaryPeriod
        self.parameters = parameters_best

        self.minVal = None
        self.maxVal = None
        self.spatial_tolerance = None
        self.encTimestamp = None
        self.encValue = None
        self.sp = None
        self.tm = None
        self.anomalyLikelihood = None

        # optional debug info
        self.enc_info = None
        self.sp_info = None
        self.tm_info = None

        # for initialization
        self.init_data = []
        self.is_initialized = False
        self.iteration_ = 0

        # for smoothing with gaussian
        self.historic_raw_anomaly_scores = deque(maxlen=smoothingKernelSize)
        self.kernel = None
        self.learningPeriod = None

    def initialize(self, input_min=0, input_max=0):
        # setup spatial anomaly
        if self.useSpatialAnomaly:
            self.spatial_tolerance = self.parameters["spatial_tolerance"]

        ## setup Enc, SP, TM
        # Make the Encoders.  These will convert input data into binary representations.
        self.encTimestamp = DateEncoder(timeOfDay=self.parameters["enc"]["time"]["timeOfDay"])

        scalarEncoderParams = RDSE_Parameters()
        scalarEncoderParams.size = self.parameters["enc"]["value"]["size"]
        scalarEncoderParams.activeBits = self.parameters["enc"]["value"]["activeBits"]
        scalarEncoderParams.resolution = max(0.001, (input_max - input_min) / 130)
        scalarEncoderParams.seed = self.parameters["enc"]["value"]["seed"]

        self.encValue = RDSE(scalarEncoderParams)
        encodingWidth = (self.encTimestamp.size + self.encValue.size)
        self.enc_info = Metrics([encodingWidth], 999999999)

        # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
        # SpatialPooler
        spParams = self.parameters["sp"]
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth,),
            columnDimensions=(spParams["columnDimensions"],),
            potentialRadius=encodingWidth,
            potentialPct=spParams["potentialPct"],
            globalInhibition=spParams["globalInhibition"],
            localAreaDensity=spParams["localAreaDensity"],
            numActiveColumnsPerInhArea=spParams["numActiveColumnsPerInhArea"],
            stimulusThreshold=spParams["stimulusThreshold"],
            synPermInactiveDec=spParams["synPermInactiveDec"],
            synPermActiveInc=spParams["synPermActiveInc"],
            synPermConnected=spParams["synPermConnected"],
            boostStrength=spParams["boostStrength"],
            wrapAround=spParams["wrapAround"],
            minPctOverlapDutyCycle=spParams["minPctOverlapDutyCycle"],
            dutyCyclePeriod=spParams["dutyCyclePeriod"],
            seed=spParams["seed"],
        )
        self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999)

        # TemporalMemory
        tmParams = self.parameters["tm"]
        self.tm = TemporalMemory(
            columnDimensions=(spParams["columnDimensions"],),
            cellsPerColumn=tmParams["cellsPerColumn"],
            activationThreshold=tmParams["activationThreshold"],
            initialPermanence=tmParams["initialPermanence"],
            connectedPermanence=tmParams["connectedPermanence"],
            minThreshold=tmParams["minThreshold"],
            maxNewSynapseCount=tmParams["maxNewSynapseCount"],
            permanenceIncrement=tmParams["permanenceIncrement"],
            permanenceDecrement=tmParams["permanenceDecrement"],
            predictedSegmentDecrement=tmParams["predictedSegmentDecrement"],
            maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
            maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"],
            seed=tmParams["seed"]
        )
        self.tm_info = Metrics([self.tm.numberOfCells()], 999999999)

        anParams = self.parameters["anomaly"]["likelihood"]
        self.learningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
        self.anomalyLikelihood = AnomalyLikelihood(
            learningPeriod=self.learningPeriod,
            estimationSamples=self.probationaryPeriod - self.learningPeriod,
            reestimationPeriod=anParams["reestimationPeriod"])

        self.kernel = self._gauss_kernel(self.historic_raw_anomaly_scores.maxlen,
                                         self.historic_raw_anomaly_scores.maxlen)

    def modelRun(self, ts, val):
        """
           Run a single pass through HTM model
           @config ts - Timestamp
           @config val - float input value
           @return rawAnomalyScore computed for the `val` in this step
        """
        self.iteration_ += 1

        # 0. During the probation period, gather the data and return 0.01.
        if self.iteration_ <= self.probationaryPeriod:
            self.init_data.append((ts, val))
            return 0.01

        if self.is_initialized is False:
            if self.verbose:
                print("[{}] Initializing".format(self.name))
            temp_iteration = self.iteration_
            vals = [i[1] for i in self.init_data]
            self.initialize(input_min=min(vals), input_max=max(vals))
            self.is_initialized = True
            for ts, val in self.init_data:
                self.modelRun(ts, val)
            self.iteration_ = temp_iteration
            if self.verbose:
                print("[{}] Initialization done".format(self.name))


        ## run data through our model pipeline: enc -> SP -> TM -> Anomaly
        # 1. Encoding
        # Call the encoders to create bit representations for each value.  These are SDR objects.
        dateBits = self.encTimestamp.encode(ts)
        valueBits = self.encValue.encode(float(val))
        # Concatenate all these encodings into one large encoding for Spatial Pooling.
        encoding = SDR(self.encTimestamp.size + self.encValue.size).concatenate([valueBits, dateBits])
        self.enc_info.addData(encoding)

        # 2. Spatial Pooler
        # Create an SDR to represent active columns, This will be populated by the
        # compute method below. It must have the same dimensions as the Spatial Pooler.
        activeColumns = SDR(self.sp.getColumnDimensions())
        # Execute Spatial Pooling algorithm over input space.
        self.sp.compute(encoding, True, activeColumns)
        self.sp_info.addData(activeColumns)

        # 3. Temporal Memory
        # Execute Temporal Memory algorithm over active mini-columns.
        self.tm.compute(activeColumns, learn=True)
        self.tm_info.addData(self.tm.getActiveCells().flatten())

        # 4. Anomaly
        # handle spatial, contextual (raw, likelihood) anomalies
        # -Spatial
        spatialAnomaly = 0.0
        if self.useSpatialAnomaly:
            # Update min/max values and check if there is a spatial anomaly
            if self.minVal != self.maxVal:
                tolerance = (self.maxVal - self.minVal) * self.spatial_tolerance
                maxExpected = self.maxVal + tolerance
                minExpected = self.minVal - tolerance
                if val > maxExpected or val < minExpected:
                    spatialAnomaly = 1.0
            if self.maxVal is None or val > self.maxVal:
                self.maxVal = val
            if self.minVal is None or val < self.minVal:
                self.minVal = val

        # -Temporal
        raw = self.tm.anomaly
        like = self.anomalyLikelihood.anomalyProbability(val, raw, ts)
        logScore = self.anomalyLikelihood.computeLogLikelihood(like)
        temporalAnomaly = logScore

        anomalyScore = max(spatialAnomaly, temporalAnomaly)  # this is the "main" anomaly, compared in NAB

        # 5. Apply smoothing
        self.historic_raw_anomaly_scores.append(anomalyScore)
        historic_scores = np.asarray(self.historic_raw_anomaly_scores)
        convolved = np.convolve(historic_scores, self.kernel, 'valid')
        anomalyScore = convolved[-1]

        return anomalyScore

    @staticmethod
    def estimateNormal(sampleData, performLowerBoundCheck=True):
        """
        :param sampleData:
        :type sampleData: Numpy array.
        :param performLowerBoundCheck:
        :type performLowerBoundCheck: bool
        :returns: A dict containing the parameters of a normal distribution based on
            the ``sampleData``.
        """
        mean = np.mean(sampleData)
        variance = np.var(sampleData)
        st_dev = 0

        if performLowerBoundCheck:
            # Handle edge case of almost no deviations and super low anomaly scores. We
            # find that such low anomaly means can happen, but then the slightest blip
            # of anomaly score can cause the likelihood to jump up to red.
            if mean < 0.03:
                mean = 0.03

            # Catch all for super low variance to handle numerical precision issues
            if variance < 0.0003:
                variance = 0.0003

        # Compute standard deviation
        if variance > 0:
            st_dev = math.sqrt(variance)

        return mean, variance, st_dev

    @staticmethod
    def _calcSkipRecords(numIngested, windowSize, learningPeriod):
        """Return the value of skipRecords for passing to estimateAnomalyLikelihoods

        If `windowSize` is very large (bigger than the amount of data) then this
        could just return `learningPeriod`. But when some values have fallen out of
        the historical sliding window of anomaly records, then we have to take those
        into account as well so we return the `learningPeriod` minus the number
        shifted out.

        :param numIngested - (int) number of data points that have been added to the
          sliding window of historical data points.
        :param windowSize - (int) size of sliding window of historical data points.
        :param learningPeriod - (int) the number of iterations required for the
          algorithm to learn the basic patterns in the dataset and for the anomaly
          score to 'settle down'.
        """
        numShiftedOut = max(0, numIngested - windowSize)
        return min(numIngested, max(0, learningPeriod - numShiftedOut))

    @staticmethod
    def _gauss_kernel(std, size):
        def _norm_pdf(x, mean, sd):
            var = float(sd) ** 2
            denom = (2 * math.pi * var) ** .5
            num = math.exp(-(float(x) - float(mean)) ** 2 / (2 * var))
            return num / denom

        kernel = [2 * _norm_pdf(idx, 0, std) for idx in list(range(-size + 1, 1))]
        kernel = np.array(kernel)
        kernel = np.flip(kernel)
        kernel = kernel / sum(kernel)
        return kernel

Пример #2

Показать файл

Файл: rdse.py Проект: yuvalot/htm.core

    #
    # Run the encoder and measure some statistics about its output.
    #
    if args.category:
        n_samples = int(args.maximum - args.minimum + 1)
    else:
        n_samples = (args.maximum - args.minimum) / enc.parameters.resolution
        oversample = 2  # Use more samples than needed to avoid aliasing & artifacts.
        n_samples = int(round(oversample * n_samples))
    sdrs = []
    for i in np.linspace(args.minimum, args.maximum, n_samples):
        sdrs.append(enc.encode(i))

    M = Metrics([enc.size], len(sdrs) + 1)
    for s in sdrs:
        M.addData(s)
    print("Statistics:")
    print("Encoded %d inputs." % len(sdrs))
    print("Output " + str(M))

    #
    # Plot the Receptive Field of each bit in the encoder.
    #
    import matplotlib.pyplot as plt
    if 'matplotlib.pyplot' in modules:
        rf = np.zeros([enc.size, len(sdrs)], dtype=np.uint8)
        for i in range(len(sdrs)):
            rf[:, i] = sdrs[i].dense
        plt.imshow(rf, interpolation='nearest')
        plt.title("RDSE Receptive Fields")
        plt.ylabel("Cell Number")

Пример #3

Показать файл

class HtmcoreDetector(AnomalyDetector):
  """
  This detector uses an HTM based anomaly detection technique.
  """

  def __init__(self, *args, **kwargs):

    super(HtmcoreDetector, self).__init__(*args, **kwargs)

    ## API for controlling settings of htm.core HTM detector:

    # Set this to False if you want to get results based on raw scores
    # without using AnomalyLikelihood. This will give worse results, but
    # useful for checking the efficacy of AnomalyLikelihood. You will need
    # to re-optimize the thresholds when running with this setting.
    self.useLikelihood      = True
    self.useSpatialAnomaly  = True
    self.verbose            = True

    # Set this to true if you want to use the optimization.
    # If true, it reads the parameters from ./params.json
    # If false, it reads the parameters from ./best_params.json
    self.use_optimization   = False

    ## internal members 
    # (listed here for easier understanding)
    # initialized in `initialize()`
    self.encTimestamp   = None
    self.encValue       = None
    self.sp             = None
    self.tm             = None
    self.anLike         = None
    # optional debug info
    self.enc_info       = None
    self.sp_info        = None
    self.tm_info        = None
    # internal helper variables:
    self.inputs_ = []
    self.iteration_ = 0


  def getAdditionalHeaders(self):
    """Returns a list of strings."""
    return ["raw_score"] #TODO optional: add "prediction"


  def handleRecord(self, inputData):
    """Returns a tuple (anomalyScore, rawScore).

    @param inputData is a dict {"timestamp" : Timestamp(), "value" : float}

    @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...)
    """
    # Send it to Numenta detector and get back the results
    return self.modelRun(inputData["timestamp"], inputData["value"]) 



  def initialize(self):
    # toggle parameters here
    if self.use_optimization:
      parameters = get_params('params.json')
    else:
      parameters = parameters_numenta_comparable

    # setup spatial anomaly
    if self.useSpatialAnomaly:
      # Keep track of value range for spatial anomaly detection
      self.minVal = None
      self.maxVal = None

    ## setup Enc, SP, TM, Likelihood
    # Make the Encoders.  These will convert input data into binary representations.
    self.encTimestamp = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"],
                                    weekend  = parameters["enc"]["time"]["weekend"])

    scalarEncoderParams            = RDSE_Parameters()
    scalarEncoderParams.size       = parameters["enc"]["value"]["size"]
    scalarEncoderParams.sparsity   = parameters["enc"]["value"]["sparsity"]
    scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]

    self.encValue = RDSE( scalarEncoderParams )
    encodingWidth = (self.encTimestamp.size + self.encValue.size)
    self.enc_info = Metrics( [encodingWidth], 999999999 )

    # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
    # SpatialPooler
    spParams = parameters["sp"]
    self.sp = SpatialPooler(
      inputDimensions            = (encodingWidth,),
      columnDimensions           = (spParams["columnCount"],),
      potentialPct               = spParams["potentialPct"],
      potentialRadius            = encodingWidth,
      globalInhibition           = True,
      localAreaDensity           = spParams["localAreaDensity"],
      synPermInactiveDec         = spParams["synPermInactiveDec"],
      synPermActiveInc           = spParams["synPermActiveInc"],
      synPermConnected           = spParams["synPermConnected"],
      boostStrength              = spParams["boostStrength"],
      wrapAround                 = True
    )
    self.sp_info = Metrics( self.sp.getColumnDimensions(), 999999999 )

    # TemporalMemory
    tmParams = parameters["tm"]
    self.tm = TemporalMemory(
      columnDimensions          = (spParams["columnCount"],),
      cellsPerColumn            = tmParams["cellsPerColumn"],
      activationThreshold       = tmParams["activationThreshold"],
      initialPermanence         = tmParams["initialPerm"],
      connectedPermanence       = spParams["synPermConnected"],
      minThreshold              = tmParams["minThreshold"],
      maxNewSynapseCount        = tmParams["newSynapseCount"],
      permanenceIncrement       = tmParams["permanenceInc"],
      permanenceDecrement       = tmParams["permanenceDec"],
      predictedSegmentDecrement = 0.0,
      maxSegmentsPerCell        = tmParams["maxSegmentsPerCell"],
      maxSynapsesPerSegment     = tmParams["maxSynapsesPerSegment"]
    )
    self.tm_info = Metrics( [self.tm.numberOfCells()], 999999999 )

    # setup likelihood, these settings are used in NAB
    if self.useLikelihood:
      anParams = parameters["anomaly"]["likelihood"]
      learningPeriod     = int(math.floor(self.probationaryPeriod / 2.0))
      self.anomalyLikelihood = AnomalyLikelihood(
                                 learningPeriod= learningPeriod,
                                 estimationSamples= self.probationaryPeriod - learningPeriod,
                                 reestimationPeriod= anParams["reestimationPeriod"])
    # Predictor
    # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
    # predictor_resolution = 1

    # initialize pandaBaker
    if PANDA_VIS_BAKE_DATA:
      self.BuildPandaSystem(self.sp, self.tm, parameters["enc"]["value"]["size"], self.encTimestamp.size)

  def modelRun(self, ts, val):
      """
         Run a single pass through HTM model

         @params ts - Timestamp
         @params val - float input value

         @return rawAnomalyScore computed for the `val` in this step
      """
      ## run data through our model pipeline: enc -> SP -> TM -> Anomaly
      self.inputs_.append( val )
      self.iteration_ += 1
      
      # 1. Encoding
      # Call the encoders to create bit representations for each value.  These are SDR objects.
      dateBits        = self.encTimestamp.encode(ts)
      valueBits       = self.encValue.encode(float(val))
      # Concatenate all these encodings into one large encoding for Spatial Pooling.
      encoding = SDR( self.encTimestamp.size + self.encValue.size ).concatenate([valueBits, dateBits])
      self.enc_info.addData( encoding )

      # 2. Spatial Pooler
      # Create an SDR to represent active columns, This will be populated by the
      # compute method below. It must have the same dimensions as the Spatial Pooler.
      activeColumns = SDR( self.sp.getColumnDimensions() )
      # Execute Spatial Pooling algorithm over input space.
      self.sp.compute(encoding, True, activeColumns)
      self.sp_info.addData( activeColumns )

      # 3. Temporal Memory
      # Execute Temporal Memory algorithm over active mini-columns.

      # to get predictive cells we need to call activateDendrites & activateCells separately
      if PANDA_VIS_BAKE_DATA:
        # activateDendrites calculates active segments
        self.tm.activateDendrites(learn=True)
        # predictive cells are calculated directly from active segments
        predictiveCells = self.tm.getPredictiveCells()
        # activates cells in columns by TM algorithm (winners, bursting...)
        self.tm.activateCells(activeColumns, learn=True)
      else:
        self.tm.compute(activeColumns, learn=True)

      self.tm_info.addData( self.tm.getActiveCells().flatten() )

      # 4.1 (optional) Predictor #TODO optional
      #TODO optional: also return an error metric on predictions (RMSE, R2,...)

      # 4.2 Anomaly 
      # handle spatial, contextual (raw, likelihood) anomalies
      # -Spatial
      spatialAnomaly = 0.0 #TODO optional: make this computed in SP (and later improve)
      if self.useSpatialAnomaly:
        # Update min/max values and check if there is a spatial anomaly
        if self.minVal != self.maxVal:
          tolerance = (self.maxVal - self.minVal) * SPATIAL_TOLERANCE
          maxExpected = self.maxVal + tolerance
          minExpected = self.minVal - tolerance
          if val > maxExpected or val < minExpected:
            spatialAnomaly = 1.0
        if self.maxVal is None or val > self.maxVal:
          self.maxVal = val
        if self.minVal is None or val < self.minVal:
          self.minVal = val

      # -temporal (raw)
      raw= self.tm.anomaly
      temporalAnomaly = raw

      if self.useLikelihood:
        # Compute log(anomaly likelihood)
        like = self.anomalyLikelihood.anomalyProbability(val, raw, ts)
        logScore = self.anomalyLikelihood.computeLogLikelihood(like)
        temporalAnomaly = logScore #TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood

      anomalyScore = max(spatialAnomaly, temporalAnomaly) # this is the "main" anomaly, compared in NAB

      # 5. print stats
      if self.verbose and self.iteration_ % 1000 == 0:
          # print(self.enc_info)
          # print(self.sp_info)
          # print(self.tm_info)
          pass

      # 6. panda vis
      if PANDA_VIS_BAKE_DATA:
          # ------------------HTMpandaVis----------------------
          # see more about this structure at https://github.com/htm-community/HTMpandaVis/blob/master/pandaBaker/README.md
          # fill up values
          pandaBaker.inputs["Value"].stringValue = "value: {:.2f}".format(val)
          pandaBaker.inputs["Value"].bits = valueBits.sparse

          pandaBaker.inputs["TimeOfDay"].stringValue = str(ts)
          pandaBaker.inputs["TimeOfDay"].bits = dateBits.sparse

          pandaBaker.layers["Layer1"].activeColumns = activeColumns.sparse
          pandaBaker.layers["Layer1"].winnerCells = self.tm.getWinnerCells().sparse
          pandaBaker.layers["Layer1"].predictiveCells = predictiveCells.sparse
          pandaBaker.layers["Layer1"].activeCells = self.tm.getActiveCells().sparse

          # customizable datastreams to be show on the DASH PLOTS
          pandaBaker.dataStreams["rawAnomaly"].value = temporalAnomaly
          pandaBaker.dataStreams["value"].value = val
          pandaBaker.dataStreams["numberOfWinnerCells"].value = len(self.tm.getWinnerCells().sparse)
          pandaBaker.dataStreams["numberOfPredictiveCells"].value = len(predictiveCells.sparse)
          pandaBaker.dataStreams["valueInput_sparsity"].value = valueBits.getSparsity()
          pandaBaker.dataStreams["dateInput_sparsity"].value = dateBits.getSparsity()

          pandaBaker.dataStreams["Layer1_SP_overlap_metric"].value = self.sp_info.overlap.overlap
          pandaBaker.dataStreams["Layer1_TM_overlap_metric"].value = self.sp_info.overlap.overlap
          pandaBaker.dataStreams["Layer1_SP_activation_frequency"].value = self.sp_info.activationFrequency.mean()
          pandaBaker.dataStreams["Layer1_TM_activation_frequency"].value = self.tm_info.activationFrequency.mean()
          pandaBaker.dataStreams["Layer1_SP_entropy"].value = self.sp_info.activationFrequency.mean()
          pandaBaker.dataStreams["Layer1_TM_entropy"].value = self.tm_info.activationFrequency.mean()

          pandaBaker.StoreIteration(self.iteration_-1)
          print("ITERATION: " + str(self.iteration_-1))

          # ------------------HTMpandaVis----------------------

      return (anomalyScore, raw)

  # with this method, the structure for visualization is defined
  def BuildPandaSystem(self, sp, tm, consumptionBits_size, dateBits_size):

      # we have two inputs connected to proximal synapses of Layer1
      pandaBaker.inputs["Value"] = cInput(consumptionBits_size)
      pandaBaker.inputs["TimeOfDay"] = cInput(dateBits_size)

      pandaBaker.layers["Layer1"] = cLayer(sp, tm)  # Layer1 has Spatial Pooler & Temporal Memory
      pandaBaker.layers["Layer1"].proximalInputs = [
          "Value",
          "TimeOfDay",
      ]
      pandaBaker.layers["Layer1"].distalInputs = ["Layer1"]

      # data for dash plots
      streams = ["rawAnomaly", "value", "numberOfWinnerCells", "numberOfPredictiveCells",
                 "valueInput_sparsity", "dateInput_sparsity", "Layer1_SP_overlap_metric", "Layer1_TM_overlap_metric",
                 "Layer1_SP_activation_frequency", "Layer1_TM_activation_frequency", "Layer1_SP_entropy",
                 "Layer1_TM_entropy"
                 ]

      pandaBaker.dataStreams = dict((name, cDataStream()) for name in streams)  # create dicts for more comfortable code
      # could be also written like: pandaBaker.dataStreams["myStreamName"] = cDataStream()

      pandaBaker.PrepareDatabase()

Пример #4

Показать файл

                similar = {"doc": document, "bits": current}
            else:
                if (distance(current, reference["bits"]) < distance(
                        similar["bits"], reference["bits"])):
                    similar = {"doc": document, "bits": current}

            if not unsimilar:
                unsimilar = {"doc": document, "bits": current}
            else:
                if (distance(current, reference["bits"]) > distance(
                        unsimilar["bits"], reference["bits"])):
                    unsimilar = {"doc": document, "bits": current}

    report = Metrics([encoder.size], len(sdrs) + 1)
    for sdr in sdrs:
        report.addData(sdr)

    print("Statistics:")
    print("\tEncoded %d Document inputs." % len(sdrs))
    print("\tOutput: " + str(report))

    print("Similarity:")
    print("\tReference:\n\t\t" + str(reference["doc"]))
    print("\tMOST Similar (Distance = " +
          str(distance(similar["bits"], reference["bits"])) + "):")
    print("\t\t" + str(similar["doc"]))
    print("\tLEAST Similar (Distance = " +
          str(distance(unsimilar["bits"], reference["bits"])) + "):")
    print("\t\t" + str(unsimilar["doc"]))

    # Plot the Receptive Field of each bit in the encoder.

Пример #5

Показать файл

Файл: hotgym.py Проект: Leonardbcm/htm.core

def main(parameters=default_parameters, argv=None, verbose=True):
  if verbose:
    import pprint
    print("Parameters:")
    pprint.pprint(parameters, indent=4)
    print("")

  # Read the input file.
  records = []
  with open(_INPUT_FILE_PATH, "r") as fin:
    reader = csv.reader(fin)
    headers = next(reader)
    next(reader)
    next(reader)
    for record in reader:
      records.append(record)

  # Make the Encoders.  These will convert input data into binary representations.
  dateEncoder = DateEncoder(timeOfDay= parameters["enc"]["time"]["timeOfDay"], 
                            weekend  = parameters["enc"]["time"]["weekend"]) 
  
  scalarEncoderParams            = RDSE_Parameters()
  scalarEncoderParams.size       = parameters["enc"]["value"]["size"]
  scalarEncoderParams.sparsity   = parameters["enc"]["value"]["sparsity"]
  scalarEncoderParams.resolution = parameters["enc"]["value"]["resolution"]
  scalarEncoder = RDSE( scalarEncoderParams )
  encodingWidth = (dateEncoder.size + scalarEncoder.size)
  enc_info = Metrics( [encodingWidth], 999999999 )

  # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
  spParams = parameters["sp"]
  sp = SpatialPooler(
    inputDimensions            = (encodingWidth,),
    columnDimensions           = (spParams["columnCount"],),
    potentialPct               = spParams["potentialPct"],
    potentialRadius            = encodingWidth,
    globalInhibition           = True,
    localAreaDensity           = spParams["localAreaDensity"],
    synPermInactiveDec         = spParams["synPermInactiveDec"],
    synPermActiveInc           = spParams["synPermActiveInc"],
    synPermConnected           = spParams["synPermConnected"],
    boostStrength              = spParams["boostStrength"],
    wrapAround                 = True
  )
  sp_info = Metrics( sp.getColumnDimensions(), 999999999 )

  tmParams = parameters["tm"]
  tm = TemporalMemory(
    columnDimensions          = (spParams["columnCount"],),
    cellsPerColumn            = tmParams["cellsPerColumn"],
    activationThreshold       = tmParams["activationThreshold"],
    initialPermanence         = tmParams["initialPerm"],
    connectedPermanence       = spParams["synPermConnected"],
    minThreshold              = tmParams["minThreshold"],
    maxNewSynapseCount        = tmParams["newSynapseCount"],
    permanenceIncrement       = tmParams["permanenceInc"],
    permanenceDecrement       = tmParams["permanenceDec"],
    predictedSegmentDecrement = 0.0,
    maxSegmentsPerCell        = tmParams["maxSegmentsPerCell"],
    maxSynapsesPerSegment     = tmParams["maxSynapsesPerSegment"]
  )
  tm_info = Metrics( [tm.numberOfCells()], 999999999 )

  # setup likelihood, these settings are used in NAB
  anParams = parameters["anomaly"]["likelihood"]
  probationaryPeriod = int(math.floor(float(anParams["probationaryPct"])*len(records)))
  learningPeriod     = int(math.floor(probationaryPeriod / 2.0))
  anomaly_history = AnomalyLikelihood(learningPeriod= learningPeriod,
                                      estimationSamples= probationaryPeriod - learningPeriod,
                                      reestimationPeriod= anParams["reestimationPeriod"])

  predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
  predictor_resolution = 1

  # Iterate through every datum in the dataset, record the inputs & outputs.
  inputs      = []
  anomaly     = []
  anomalyProb = []
  predictions = {1: [], 5: []}
  for count, record in enumerate(records):

    # Convert date string into Python date object.
    dateString = datetime.datetime.strptime(record[0], "%m/%d/%y %H:%M")
    # Convert data value string into float.
    consumption = float(record[1])
    inputs.append( consumption )

    # Call the encoders to create bit representations for each value.  These are SDR objects.
    dateBits        = dateEncoder.encode(dateString)
    consumptionBits = scalarEncoder.encode(consumption)

    # Concatenate all these encodings into one large encoding for Spatial Pooling.
    encoding = SDR( encodingWidth ).concatenate([consumptionBits, dateBits])
    enc_info.addData( encoding )

    # Create an SDR to represent active columns, This will be populated by the
    # compute method below. It must have the same dimensions as the Spatial Pooler.
    activeColumns = SDR( sp.getColumnDimensions() )

    # Execute Spatial Pooling algorithm over input space.
    sp.compute(encoding, True, activeColumns)
    sp_info.addData( activeColumns )

    # Execute Temporal Memory algorithm over active mini-columns.
    tm.compute(activeColumns, learn=True)
    tm_info.addData( tm.getActiveCells().flatten() )

    # Predict what will happen, and then train the predictor based on what just happened.
    pdf = predictor.infer( count, tm.getActiveCells() )
    for n in (1, 5):
      if pdf[n]:
        predictions[n].append( np.argmax( pdf[n] ) * predictor_resolution )
      else:
        predictions[n].append(float('nan'))
    predictor.learn( count, tm.getActiveCells(), int(consumption / predictor_resolution))

    anomalyLikelihood = anomaly_history.anomalyProbability( consumption, tm.anomaly )
    anomaly.append( tm.anomaly )
    anomalyProb.append( anomalyLikelihood )

  # Print information & statistics about the state of the HTM.
  print("Encoded Input", enc_info)
  print("")
  print("Spatial Pooler Mini-Columns", sp_info)
  print(str(sp))
  print("")
  print("Temporal Memory Cells", tm_info)
  print(str(tm))
  print("")

  # Shift the predictions so that they are aligned with the input they predict.
  for n_steps, pred_list in predictions.items():
    for x in range(n_steps):
        pred_list.insert(0, float('nan'))
        pred_list.pop()

  # Calculate the predictive accuracy, Root-Mean-Squared
  accuracy         = {1: 0, 5: 0}
  accuracy_samples = {1: 0, 5: 0}
  for idx, inp in enumerate(inputs):
    for n in predictions: # For each [N]umber of time steps ahead which was predicted.
      val = predictions[n][ idx ]
      if not math.isnan(val):
        accuracy[n] += (inp - val) ** 2
        accuracy_samples[n] += 1
  for n in sorted(predictions):
    accuracy[n] = (accuracy[n] / accuracy_samples[n]) ** .5
    print("Predictive Error (RMS)", n, "steps ahead:", accuracy[n])

  # Show info about the anomaly (mean & std)
  print("Anomaly Mean", np.mean(anomaly))
  print("Anomaly Std ", np.std(anomaly))

  # Plot the Predictions and Anomalies.
  if verbose:
    try:
      import matplotlib.pyplot as plt
    except:
      print("WARNING: failed to import matplotlib, plots cannot be shown.")
      return -accuracy[5]

    plt.subplot(2,1,1)
    plt.title("Predictions")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), predictions[1], 'blue',
             np.arange(len(inputs)), predictions[5], 'green',)
    plt.legend(labels=('Input', '1 Step Prediction, Shifted 1 step', '5 Step Prediction, Shifted 5 steps'))

    plt.subplot(2,1,2)
    plt.title("Anomaly Score")
    plt.xlabel("Time")
    plt.ylabel("Power Consumption")
    inputs = np.array(inputs) / max(inputs)
    plt.plot(np.arange(len(inputs)), inputs, 'red',
             np.arange(len(inputs)), anomaly, 'blue',)
    plt.legend(labels=('Input', 'Anomaly Score'))
    plt.show()

  return -accuracy[5]

Пример #6

Показать файл

    def testStatistics(self):
        # 100 random simple English words run mass encoding stats against
        testCorpus = [
            "find", "any", "new", "work", "part", "take", "get", "place",
            "made", "live", "where", "after", "back", "little", "only",
            "round", "man", "year", "came", "show", "every", "good", "me",
            "give", "our", "under", "name", "very", "through", "just", "form",
            "sentence", "great", "think", "say", "help", "low", "line",
            "differ", "turn", "cause", "much", "mean", "before", "move",
            "right", "boy", "old", "too", "same", "tell", "does", "set",
            "three", "want", "air", "well", "also", "play", "small", "end",
            "put", "home", "read", "hand", "port", "large", "spell", "add",
            "even", "land", "here", "must", "big", "high", "such", "follow",
            "act", "why", "ask", "men", "change", "went", "light", "kind",
            "off", "need", "house", "picture", "try", "us", "again", "animal",
            "point", "mother", "world", "near", "build", "self", "earth"]
        num_samples = 1000  # number of documents to run
        num_tokens = 10     # tokens per document

        # Case 1 = tokenSimilarity OFF
        params1 = SimHashDocumentEncoderParameters()
        params1.size = 400
        params1.sparsity = 0.33
        params1.tokenSimilarity = False
        encoder1 = SimHashDocumentEncoder(params1)

        # Case 2 = tokenSimilarity ON
        params2 = params1
        params2.tokenSimilarity = True
        encoder2 = SimHashDocumentEncoder(params2)

        sdrs1 = []
        sdrs2 = []
        for _ in range(num_samples):
            document = []
            for _ in range(num_tokens - 1):
                token = testCorpus[random.randint(0, len(testCorpus) - 1)]
                document.append(token)
            sdrs1.append(encoder1.encode(document))
            sdrs2.append(encoder2.encode(document))

        report1 = Metrics([encoder1.size], len(sdrs1) + 1)
        report2 = Metrics([encoder2.size], len(sdrs2) + 1)

        for sdr in sdrs1:
            report1.addData(sdr)
        for sdr in sdrs2:
            report2.addData(sdr)

        # Assertions for Case 1 = tokenSimilarity OFF
        assert(report1.activationFrequency.entropy() > 0.87)
        assert(report1.activationFrequency.min() > 0.01)
        assert(report1.activationFrequency.max() < 0.99)
        assert(report1.activationFrequency.mean() > params1.sparsity - 0.005)
        assert(report1.activationFrequency.mean() < params1.sparsity + 0.005)
        assert(report1.overlap.min() > 0.21)
        assert(report1.overlap.max() > 0.53)
        assert(report1.overlap.mean() > 0.38)
        assert(report1.sparsity.min() > params1.sparsity - 0.01)
        assert(report1.sparsity.max() < params1.sparsity + 0.01)
        assert(report1.sparsity.mean() > params1.sparsity - 0.005)
        assert(report1.sparsity.mean() < params1.sparsity + 0.005)

        # Assertions for Case 2 = tokenSimilarity ON
        assert(report2.activationFrequency.entropy() > 0.59)
        assert(report2.activationFrequency.min() >= 0)
        assert(report2.activationFrequency.max() <= 1)
        assert(report2.activationFrequency.mean() > params2.sparsity - 0.005)
        assert(report2.activationFrequency.mean() < params2.sparsity + 0.005)
        assert(report2.overlap.min() > 0.38)
        assert(report2.overlap.max() > 0.78)
        assert(report2.overlap.mean() > 0.61)
        assert(report2.sparsity.min() > params2.sparsity - 0.01)
        assert(report2.sparsity.max() < params2.sparsity + 0.01)
        assert(report2.sparsity.mean() > params2.sparsity - 0.005)
        assert(report2.sparsity.mean() < params2.sparsity + 0.005)

Пример #7

Показать файл

class HTMCoreDetector(object):
    def __init__(self, inputMin, inputMax, probationaryPeriod, *args,
                 **kwargs):
        self.inputMin = inputMin
        self.inputMax = inputMax
        self.probationaryPeriod = probationaryPeriod
        ## API for controlling settings of htm.core HTM detector:

        # Set this to False if you want to get results based on raw scores
        # without using AnomalyLikelihood. This will give worse results, but
        # useful for checking the efficacy of AnomalyLikelihood. You will need
        # to re-optimize the thresholds when running with this setting.
        self.useLikelihood = True
        self.verbose = False

        ## internal members
        # (listed here for easier understanding)
        # initialized in `initialize()`
        self.encTimestamp = None
        self.encValue = None
        self.sp = None
        self.tm = None
        self.anLike = None
        # optional debug info
        self.enc_info = None
        self.sp_info = None
        self.tm_info = None
        # internal helper variables:
        self.inputs_ = []
        self.iteration_ = 0

    def handleRecord(self, ts, val):
        """Returns a tuple (anomalyScore, rawScore).
        @param ts Timestamp
        @param val float
        @return tuple (anomalyScore, <any other fields specified in `getAdditionalHeaders()`>, ...)
        """
        # Send it to Numenta detector and get back the results
        return self.modelRun(ts, val)

    def initialize(self):
        # toggle parameters here
        # parameters = default_parameters
        parameters = parameters_numenta_comparable

        ## setup Enc, SP, TM, Likelihood
        # Make the Encoders.  These will convert input data into binary representations.
        self.encTimestamp = DateEncoder(
            timeOfDay=parameters["enc"]["time"]["timeOfDay"],
            weekend=parameters["enc"]["time"]["weekend"],
            season=parameters["enc"]["time"]["season"],
            dayOfWeek=parameters["enc"]["time"]["dayOfWeek"])

        scalarEncoderParams = EncParameters()
        scalarEncoderParams.size = parameters["enc"]["value"]["size"]
        scalarEncoderParams.sparsity = parameters["enc"]["value"]["sparsity"]
        scalarEncoderParams.resolution = parameters["enc"]["value"][
            "resolution"]

        self.encValue = Encoder(scalarEncoderParams)
        encodingWidth = (self.encTimestamp.size + self.encValue.size)
        self.enc_info = Metrics([encodingWidth], 999999999)

        # Make the HTM.  SpatialPooler & TemporalMemory & associated tools.
        # SpatialPooler
        spParams = parameters["sp"]
        self.sp = SpatialPooler(
            inputDimensions=(encodingWidth, ),
            columnDimensions=(spParams["columnCount"], ),
            potentialPct=spParams["potentialPct"],
            potentialRadius=spParams["potentialRadius"],
            globalInhibition=True,
            localAreaDensity=spParams["localAreaDensity"],
            stimulusThreshold=spParams["stimulusThreshold"],
            synPermInactiveDec=spParams["synPermInactiveDec"],
            synPermActiveInc=spParams["synPermActiveInc"],
            synPermConnected=spParams["synPermConnected"],
            boostStrength=spParams["boostStrength"],
            wrapAround=True)
        self.sp_info = Metrics(self.sp.getColumnDimensions(), 999999999)

        # TemporalMemory
        tmParams = parameters["tm"]
        self.tm = TemporalMemory(
            columnDimensions=(spParams["columnCount"], ),
            cellsPerColumn=tmParams["cellsPerColumn"],
            activationThreshold=tmParams["activationThreshold"],
            initialPermanence=tmParams["initialPerm"],
            connectedPermanence=spParams["synPermConnected"],
            minThreshold=tmParams["minThreshold"],
            maxNewSynapseCount=tmParams["newSynapseCount"],
            permanenceIncrement=tmParams["permanenceInc"],
            permanenceDecrement=tmParams["permanenceDec"],
            predictedSegmentDecrement=0.0,
            maxSegmentsPerCell=tmParams["maxSegmentsPerCell"],
            maxSynapsesPerSegment=tmParams["maxSynapsesPerSegment"])
        self.tm_info = Metrics([self.tm.numberOfCells()], 999999999)

        # setup likelihood, these settings are used in NAB
        if self.useLikelihood:
            anParams = parameters["anomaly"]["likelihood"]
            learningPeriod = int(math.floor(self.probationaryPeriod / 2.0))
            self.anomalyLikelihood = AnomalyLikelihood(
                learningPeriod=learningPeriod,
                estimationSamples=self.probationaryPeriod - learningPeriod,
                reestimationPeriod=anParams["reestimationPeriod"])
        # Predictor
        # self.predictor = Predictor( steps=[1, 5], alpha=parameters["predictor"]['sdrc_alpha'] )
        # predictor_resolution = 1

    def modelRun(self, ts, val):
        """
           Run a single pass through HTM model
           @params ts - Timestamp
           @params val - float input value
           @return rawAnomalyScore computed for the `val` in this step
        """
        ## run data through our model pipeline: enc -> SP -> TM -> Anomaly
        self.inputs_.append(val)
        self.iteration_ += 1

        # 1. Encoding
        # Call the encoders to create bit representations for each value.  These are SDR objects.
        dateBits = self.encTimestamp.encode(ts)
        valueBits = self.encValue.encode(float(val))
        # Concatenate all these encodings into one large encoding for Spatial Pooling.
        encoding = SDR(self.encTimestamp.size +
                       self.encValue.size).concatenate([valueBits, dateBits])
        self.enc_info.addData(encoding)

        # 2. Spatial Pooler
        # Create an SDR to represent active columns, This will be populated by the
        # compute method below. It must have the same dimensions as the Spatial Pooler.
        activeColumns = SDR(self.sp.getColumnDimensions())
        # Execute Spatial Pooling algorithm over input space.
        self.sp.compute(encoding, True, activeColumns)
        self.sp_info.addData(activeColumns)

        # 3. Temporal Memory
        # Execute Temporal Memory algorithm over active mini-columns.
        self.tm.compute(activeColumns, learn=True)
        self.tm_info.addData(self.tm.getActiveCells().flatten())

        # 4.1 (optional) Predictor #TODO optional
        # TODO optional: also return an error metric on predictions (RMSE, R2,...)

        # 4.2 Anomaly
        # handle contextual (raw, likelihood) anomalies
        # -temporal (raw)
        raw = self.tm.anomaly
        temporalAnomaly = raw

        if self.useLikelihood:
            # Compute log(anomaly likelihood)
            like = self.anomalyLikelihood.anomalyProbability(val, raw, ts)
            logScore = self.anomalyLikelihood.computeLogLikelihood(like)
            temporalAnomaly = logScore  # TODO optional: TM to provide anomaly {none, raw, likelihood}, compare correctness with the py anomaly_likelihood

        anomalyScore = temporalAnomaly  # this is the "main" anomaly, compared in NAB

        # 5. print stats
        if self.verbose and self.iteration_ % 1000 == 0:
            print(self.enc_info)
            print(self.sp_info)
            print(self.tm_info)
            pass

        return anomalyScore, raw