示例#1
0
文件: anomaly.py 项目: tusharp/nupic
  def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score
         will be discretized to 1/0 (1 if >= binaryAnomalyThreshold)
         The transformation is applied after moving average is computed and updated.
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
    self._binaryThreshold = binaryAnomalyThreshold
    if binaryAnomalyThreshold is not None and ( 
          not isinstance(binaryAnomalyThreshold, float) or
          binaryAnomalyThreshold >= 1.0  or 
          binaryAnomalyThreshold <= 0.0 ):
      raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) "
                       "or None if disabled.")
示例#2
0
def compute_scores(y_test, y_pred, normalize=False):
    # Errors
    errors = np.array((y_test - y_pred)**2)
    if normalize:
        errors = errors / float(errors.max() - errors.min())

    # Log likelihood.
    log_likelihoods = []
    anomaly_likelihood = AnomalyLikelihood()
    for i in range(len(y_test)):
        likelihood = anomaly_likelihood.anomalyProbability(y_test[i],
                                                           errors[i],
                                                           timestamp=None)
        log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood)
        log_likelihoods.append(log_likelihood)

    # Anomaly thresholds:
    # - HIGH: log_likelihood >= 0.5
    # - MEDIUM: 0.5 > log_likelihood >= 0.4
    N = len(log_likelihoods)
    anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)}
    x = np.array(log_likelihoods)
    high_idx = x >= 0.5
    anomalies['high'][high_idx] = 1
    # medium_idx = np.logical_and(x >= 0.4, x < 0.5)
    # anomalies['medium'][medium_idx] = 1

    return errors, log_likelihoods, anomalies
示例#3
0
    def __init__(self,
                 slidingWindowSize=None,
                 mode=MODE_PURE,
                 binaryAnomalyThreshold=None):
        self._mode = mode
        if slidingWindowSize is not None:
            self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
        else:
            self._movingAverage = None

        if (self._mode == Anomaly.MODE_LIKELIHOOD
                or self._mode == Anomaly.MODE_WEIGHTED):
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        else:
            self._likelihood = None

        if not self._mode in self._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)

        self._binaryThreshold = binaryAnomalyThreshold
        if binaryAnomalyThreshold is not None and (
                not isinstance(binaryAnomalyThreshold, float)
                or binaryAnomalyThreshold >= 1.0
                or binaryAnomalyThreshold <= 0.0):
            raise ValueError(
                "Anomaly: binaryAnomalyThreshold must be from (0,1) "
                "or None if disabled.")
示例#4
0
  def __init__(self, slidingWindowSize = None, mode=MODE_PURE):
    """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
    self._mode = mode
    if slidingWindowSize is not None:
      self._movingAverage = MovingAverage(windowSize=slidingWindowSize)
    else:
      self._movingAverage = None

    if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED:
      self._likelihood = AnomalyLikelihood() # probabilistic anomaly
    if not self._mode in Anomaly._supportedModes:
      raise ValueError("Invalid anomaly mode; only supported modes are: "
                       "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                       "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
示例#5
0
def runAvogadroAnomaly(metric, options):
    """
  Create a new HTM Model, fetch the data from the local DB, process it in NuPIC,
  and save the results to a new CSV output file.

  :param metric: AvogadroAgent metric class
  :param options: CLI Options
  """
    model = createModel(metric)
    model.enableInference({"predictedField": metric.name})

    fetched = metric.fetch(prefix=options.prefix, start=None)

    resultFile = open(
        os.path.join(options.prefix, metric.name + "-result.csv"), "wb")
    csvWriter = csv.writer(resultFile)
    csvWriter.writerow([
        "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood",
        "color"
    ])

    headers = ("timestamp", metric.name)

    anomalyLikelihood = AnomalyLikelihood()

    for (ts, value) in fetched:
        try:
            value = float(value)
        except (ValueError, TypeError):
            continue

        if not math.isnan(value):
            modelInput = dict(zip(headers, (ts, value)))
            modelInput[metric.name] = float(value)
            modelInput["timestamp"] = datetime.datetime.fromtimestamp(
                float(modelInput["timestamp"]))
            result = model.run(modelInput)
            anomalyScore = result.inferences["anomalyScore"]

            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput[metric.name], anomalyScore, modelInput["timestamp"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)

            if logLikelihood > .5:
                color = "red"
            elif logLikelihood > .4 and logLikelihood <= .5:
                color = "yellow"
            else:
                color = "green"

            csvWriter.writerow([
                modelInput["timestamp"],
                float(value), anomalyScore, logLikelihood, color
            ])

    else:
        resultFile.flush()
示例#6
0
    def get_anomaly_likelihood_calc(self, metric,
                                    models_number_below_configured_limit):
        anomaly_likelihood_calc = None
        if not self.__loaded_models.anomaly_calc_exists(metric["metric_name"]):
            anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path(
                metric["metric_name"],
                path_element="anomaly_likelihood_calculator")

            if os.path.isfile(
                    os.path.join(
                        anomaly_likelihood_calculators_path,
                        self._anomaly_likelihood_calculator_filename)):
                if models_number_below_configured_limit:
                    try:
                        if models_number_below_configured_limit:
                            self.__loaded_models.add_anomaly_calc_for_metric(
                                metric["metric_name"],
                                self.__anomaly_likelihood_calculator_factory.
                                create_anomaly_likelihood_calc_from_disk(
                                    metric))
                            self._logger.debug(
                                "get_anomaly_likelihood_calc",
                                "LOADED ANOMALY_LIKELIHOOD_CALC FROM FILE",
                                metric=str(metric["metric_name"]))

                    except Exception as ex:
                        if models_number_below_configured_limit:
                            self.__loaded_models.add_anomaly_calc_for_metric(
                                metric["metric_name"], AnomalyLikelihood())
                            self._logger.warn(
                                "get_anomaly_likelihood_calc",
                                "Failed to create an anomaly likelihood calc from disk",
                                metric=str(metric["metric_name"]),
                                exception_type=str(type(ex).__name__),
                                exception_message=str(ex.message))

            else:
                if models_number_below_configured_limit:
                    self.__loaded_models.add_anomaly_calc_for_metric(
                        metric["metric_name"], AnomalyLikelihood())
        if self.__loaded_models.anomaly_calc_exists(metric["metric_name"]):
            anomaly_likelihood_calc = self.__loaded_models.get_anomaly_calc(
                metric["metric_name"])
        return anomaly_likelihood_calc
 def __init__(self,
              learningPeriod=288,
              estimationSamples=100,
              historicWindowSize=8640,
              reestimationPeriod=100):
     self.anomalyLikelihood = AnomalyLikelihood(
         learningPeriod=learningPeriod,
         estimationSamples=estimationSamples,
         historicWindowSize=historicWindowSize,
         reestimationPeriod=reestimationPeriod)
示例#8
0
 def __init__(self):
     #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp")
     with open("model_params.json") as fp:
         self.model_params = json.load(fp)
     print self.model_params
     self.newmodel = ModelFactory.create(self.model_params)
     self.newmodel.enableLearning()
     self.newmodel.enableInference({"predictedField": "value"})
     self.DATE_FORMAT = "%d/%m/%Y %H:%M"
     self.anomalylikelihood = AnomalyLikelihood()
示例#9
0
def definir_AnomDetect(N_DATA):

    """ 
    retorna as classes de anom_score, a classe de anom_likelihood, e os arrays que guardarão a anom_score e a anom_likelihood
    """

    anom_score_txt = np.zeros((N_DATA+1,))
    anom_logscore_txt = np.zeros((N_DATA+1,))

    anomaly_score = Anomaly(slidingWindowSize=25)

    anomaly_likelihood = AnomalyLikelihood(learningPeriod=600, historicWindowSize=313)

    return anomaly_score, anomaly_likelihood, anom_score_txt, anom_logscore_txt
示例#10
0
  def __init__(self, modelId, stats, replaceParams=()):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs
    """
    self._modelId = modelId

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats, replaceParams=replaceParams)

    self._anomalyLikelihood = AnomalyLikelihood()
示例#11
0
  def __init__(self, modelId, stats):
    """
    :param str modelId: model identifier
    :param dict stats: Metric data stats per stats_schema.json in the
      unicorn_backend package.
    """
    self._modelId = modelId

    # NOTE: ModelRecordEncoder is implemented in the pull request
    # https://github.com/numenta/nupic/pull/2432 that is not yet in master.
    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=self._INPUT_RECORD_SCHEMA)

    self._model = self._createModel(stats=stats)

    self._anomalyLikelihood = AnomalyLikelihood()
示例#12
0
    def _send_predictions(self, metric_id, metric_envelope):
        if metric_id not in self._models:
            self._models[metric_id] = ModelFactory.create(self.model_params)
            self._models[metric_id].enableInference(
                {'predictedField': 'value'})
            self._shifters[metric_id] = InferenceShifter()
            self._anomaly_likelihood[metric_id] = AnomalyLikelihood()

        model = self._models[metric_id]
        shifter = self._shifters[metric_id]

        modelInput = {
            # 'dttm': value['metric']['timestamp'],
            'dttm': datetime.datetime.now(),
            'value': metric_envelope['metric']['value']
        }

        result = shifter.shift(model.run(modelInput))
        inferences = result.inferences
        inference = inferences['multiStepBestPredictions'][5]

        metric = metric_envelope['metric']
        metric_name = metric['name']

        if inference is not None:
            metric['name'] = metric_name + '.nupic.predicted'
            metric['value'] = inference
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)

        if 'anomalyScore' in inferences:
            metric['name'] = metric_name + '.nupic.anomaly_score'
            metric['value'] = inferences['anomalyScore']
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)

            anomalyLikelihood = self._anomaly_likelihood[metric_id]
            likelihood = anomalyLikelihood.anomalyProbability(
                modelInput['value'], inferences['anomalyScore'],
                datetime.datetime.now())

            metric['name'] = metric_name + '.nupic.anomaly_likelihood'
            metric['value'] = likelihood
            str_value = simplejson.dumps(metric_envelope)
            self._producer.send_messages(self._topic, str_value)
示例#13
0
  def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec):
    """
    :param inputFileObj: A file-like object that contains input metric data
    :param dict inputSpec: Input data specification per input_opt_schema.json
    :param dict aggSpec: Optional aggregation specification per
      agg_opt_schema.json or None if no aggregation is requested
    :param dict modelSpec: Model specification per model_opt_schema.json
    """
    self._inputSpec = inputSpec

    self._aggSpec = aggSpec

    self._modelSpec = modelSpec

    if "modelId" in modelSpec:
      self._modelId = modelSpec["modelId"]
    else:
      self._modelId = "Unknown"


    inputRecordSchema = (
      fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"],
                              fieldmeta.FieldMetaType.datetime,
                              fieldmeta.FieldMetaSpecial.timestamp),
      fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"],
                              fieldmeta.FieldMetaType.float,
                              fieldmeta.FieldMetaSpecial.none),
    )

    self._aggregator = aggregator.Aggregator(
      aggregationInfo=dict(
        fields=([(modelSpec["valueFieldName"], aggSpec["func"])]
                if aggSpec is not None else []),
        seconds=aggSpec["windowSize"] if aggSpec is not None else 0
      ),
      inputFields=inputRecordSchema)

    self._modelRecordEncoder = record_stream.ModelRecordEncoder(
      fields=inputRecordSchema)

    self._model = self._createModel(modelSpec=modelSpec)

    self._anomalyLikelihood = AnomalyLikelihood()

    self._csvReader = self._createCsvReader(inputFileObj)
示例#14
0
def main():
    # cluster similar inputs together in SDR space
    s = SpatialPooler()
    print(type(s))

    # powerful sequence memory in SDR space
    t = TemporalMemory()
    print(type(t))

    # computes rolling Gaussian based on raw anomaly scores and then their
    # likelihood
    a = AnomalyLikelihood()
    print(type(a))

    # temporally groups active cell sets from TM
    u = UnionTemporalPooler()
    print(type(u))

    # learning pairings of Union representations and labeled classes
    c = SDRClassifier()
    print(type(c))
    def testLikelihoodValues(self):
        """ test to see if the region keeps track of state correctly and produces
        the same likelihoods as the AnomalyLikelihood module """
        anomalyLikelihoodRegion = AnomalyLikelihoodRegion()
        anomalyLikelihood = AnomalyLikelihood()

        inputs = AnomalyLikelihoodRegion.getSpec()['inputs']
        outputs = AnomalyLikelihoodRegion.getSpec()['outputs']
        with open(_INPUT_DATA_FILE) as f:
            reader = csv.reader(f)
            reader.next()
            for record in reader:
                consumption = float(record[1])
                anomalyScore = float(record[2])
                likelihood1 = anomalyLikelihood.anomalyProbability(
                    consumption, anomalyScore)

                inputs['rawAnomalyScore'] = numpy.array([anomalyScore])
                inputs['metricValue'] = numpy.array([consumption])
                anomalyLikelihoodRegion.compute(inputs, outputs)
                likelihood2 = outputs['anomalyLikelihood'][0]

                self.assertEqual(likelihood1, likelihood2)
示例#16
0
文件: anomaly.py 项目: rreymer/nupic
    def __init__(self, slidingWindowSize=None, mode=MODE_PURE):
        """
    @param slidingWindowSize (optional) - how many elements are summed up;
        enables moving average on final anomaly score; int >= 0
    @param mode (optional) - (string) how to compute anomaly;
        possible values are:
          - "pure" - the default, how much anomal the value is;
              float 0..1 where 1=totally unexpected
          - "likelihood" - uses the anomaly_likelihood code;
              models probability of receiving this value and anomalyScore
          - "weighted" - "pure" anomaly weighted by "likelihood"
              (anomaly * likelihood)
    """
        self._mode = mode
        self._useMovingAverage = slidingWindowSize > 0
        self._buf = None
        self._i = None

        # Using cumulative anomaly, sliding window
        if self._useMovingAverage:
            self._windowSize = slidingWindowSize
            # Sliding window buffer
            self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float)
            self._i = 0  # index pointer to actual position
        elif slidingWindowSize is not None:
            raise TypeError(
                "Anomaly: if you define slidingWindowSize, it has to be an "
                "integer > 0;  slidingWindowSize=%r" % slidingWindowSize)

        if self._mode == Anomaly.MODE_LIKELIHOOD:
            self._likelihood = AnomalyLikelihood()  # probabilistic anomaly
        if not self._mode in Anomaly._supportedModes:
            raise ValueError("Invalid anomaly mode; only supported modes are: "
                             "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, "
                             "Anomaly.MODE_WEIGHTED; you used: %r" %
                             self._mode)
示例#17
0
def foreach_batch_function(df, epoch_id):
    # Transform and write batchDF
    row = df.collect()
    print "Size of Batch"
    print(len(row))
    if len(row) != 0:
        for x in range(len(row)):
            nb = nb + 1
            record = {}
            level = row[x]['level']
            #print(type(level))
            timestamp = row[x]['@timestamp']
            #print(type(timestamp))
            #print(timestamp)
            #timestamp = timestamp.encode("utf-8")
            level = level.encode("utf-8")

            if level == 'INFO' or level == 'info':
                level = 'info'
            elif level == 'ERROR' or level == 'error':
                level = 'error'
            else:
                level = 'warning'
            #print 'step 2'
            record = {"timestamp": timestamp, "level": level}
            print(record)
            result = model.run(record)
            anom = result.inferences['anomalyScore']
            #print(anom)
            record_anomalies.append(anom)
            #print "Lengths of record anomalies"
            #print(len(record_anomalies))
            mean_anomalies = np.mean(record_anomalies)
            std_anomalies = np.std(record_anomalies)
            if std_anomalies == 0:
                std_anomalies = 0.00001
            var_anomalies = np.var(record_anomalies)
            mean_anomalies_short_window = np.mean(
                record_anomalies[-int(history):])

            likelihood = 1 - (
                (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)) -
                (norm.cdf(0, mean_anomalies_short_window - mean_anomalies,
                          std_anomalies)))

            likelihood_test = 1 - (
                anom -
                (mean_anomalies_short_window - mean_anomalies)) / std_anomalies
            likelihood_test_test = 1 - qfunction(
                (mean_anomalies_short_window - mean_anomalies) / std_anomalies)
            print "Likelihood"
            print(likelihood_test_test)
            anomalyLikelihood = AnomalyLikelihood()
            anomalyProbability = anomalyLikelihood.anomalyProbability(
                record['level'], anom, record['timestamp'])
            ani = animation.FuncAnimation(fig,
                                          animate,
                                          interval=1000,
                                          x=nb,
                                          y=likelihood_test_test)
            plt.show()
            if likelihood_test_test >= 0.85:
                print "Anomaly detected!"
                print "Probability od being abnormal", likelihood_test_test
                #ibefore = i
                #if ibefore - iafter == 1:
                #    region = region + 1
                #    if region == 20:
                #        print i-20
                #        print 'Anomaly detcted!'
                #        print 'Probability of being abnormal', likelihood_test_test
                #        print 'Probability of being abnormal (nupic)', anomalyProbability
                #        region_anomaly = region_anomaly + 1
                #else :
                #    region = 0

                #iafter = ibefore
    pass
示例#18
0
def runAnomaly(options):
    global g_ps_count_dict_unsorted
    global g_abnomal_data_dict_unsorted
    """
    Create and run a CLA Model on the given dataset (based on the hotgym anomaly
    client in NuPIC).
    """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    if options.oswpsDir != "":
        # Get PS dictionary
        osw = OSWData(options.oswpsDir, PS)
        osw.traverse_dir()
        g_ps_count_dict_unsorted = osw.get_ps_dict()
        options.max = ps_max_value = max(g_ps_count_dict_unsorted.values())
        options.min = ps_min_value = min(g_ps_count_dict_unsorted.values())
        print("Min value:" + str(ps_min_value) + ', ' + "Max value:" +
              str(ps_max_value))

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print("Using resolution value: {0}".format(resolution))
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    if options.inputFile != "":
        with open(options.inputFile) as fin:
            # Open file and setup headers
            # Here we write the log likelihood value as the 'anomaly score'
            # The actual CLA outputs are labeled 'raw anomaly score'
            reader = csv.reader(fin)
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            headers = reader.next()

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, record in enumerate(reader, start=1):

                # Convert input data to a dict so we can pass it into the model
                inputData = dict(zip(headers, record))
                inputData["value"] = float(inputData["value"])
                inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"
    elif options.oswpsDir != "":
        if options.use_rtm == True:
            rtm_sensitivity = 2
            rtm = LinearRegressionTemoporalMemory(window=10,
                                                  interval=10,
                                                  min_=options.min,
                                                  max_=options.max,
                                                  boost=rtm_sensitivity,
                                                  leak_detection=0,
                                                  critical_region="right_tail",
                                                  debug=0)
            g_abnomal_data_dict_unsorted = rtm.analyze(
                g_ps_count_dict_unsorted)
        else:
            csvWriter = csv.writer(open(options.outputFile, "wb"))
            csvWriter.writerow([
                "timestamp", "value", "_raw_score", "likelihood_score",
                "log_likelihood_score"
            ])
            ps_od = collections.OrderedDict(
                sorted(g_ps_count_dict_unsorted.items()))

            # The anomaly likelihood object
            anomalyLikelihood = AnomalyLikelihood()

            # Iterate through each record in the CSV file
            print "Starting processing at", datetime.datetime.now()
            for i, timestamp in enumerate(ps_od):
                ps_count = ps_od[timestamp]

                inputData = {}
                inputData["value"] = float(ps_count)
                inputData["dttm"] = dateutil.parser.parse(timestamp)
                #inputData["dttm"] = datetime.datetime.now()

                # Send it to the CLA and get back the raw anomaly score
                result = model.run(inputData)
                anomalyScore = result.inferences['anomalyScore']

                # Compute the Anomaly Likelihood
                likelihood = anomalyLikelihood.anomalyProbability(
                    inputData["value"], anomalyScore, inputData["dttm"])
                logLikelihood = anomalyLikelihood.computeLogLikelihood(
                    likelihood)
                if likelihood > 0.9999:
                    print "Anomaly detected:", inputData['dttm'], inputData[
                        'value'], likelihood
                    g_abnomal_data_dict_unsorted[timestamp] = ps_count

                # Write results to the output CSV file
                csvWriter.writerow([
                    inputData["dttm"], inputData["value"], anomalyScore,
                    likelihood, logLikelihood
                ])

                # Progress report
                if (i % 1000) == 0:
                    print i, "records processed"

            print "Completed processing", i, "records at", datetime.datetime.now(
            )
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
示例#19
0
def run_model(model, a, b, save=True, aggregate=False, string=''):
    """Runs the HTM model and generates the anomaly scores.
    Arguments:
        :model: the model created with create_model().
        :a: the beginning of the anylized signal.
        :b: the end of the anylized signal.
        :save: if True then the anomalies output will be saved as .txt.
        :string: the string to differentiate the name of the saved .txt files.
    """

    ######################### open the signs ###########################################
    if aggregate == True:
        signal, time_vect = aggregate_(a, b)
        print("the size of signal is: {i}".format(i=np.size(signal)))

    else:
        signal = open_signs()
        signal = signal[a:b, 1]
    #-----------------------------------------------------------------------------------

    ##################### declare the anomalies lists ##################################
    anom_scores = []
    anom_likelihood = []
    anom_loglikelihood = []
    #-----------------------------------------------------------------------------------

    ##################### declare the predicted list ###################################
    predictions_1 = []
    predictions_5 = []
    predictions_1.append(0)
    for i in range(5):
        predictions_5.append(
            0
        )  # as this prediction is always made 1 step ahead, then the first value predicted will be ...
        # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ...
        # index. The same problem occurs with the last signal, because it will predict one more ...
        # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ...
        # having a matching value in the signal array.
    #-----------------------------------------------------------------------------------

    ################ declare the Anom likelihood class #################################
    likelihood = AnomalyLikelihood(learningPeriod=300)
    #-----------------------------------------------------------------------------------

    for counter, value in enumerate(
            signal
    ):  # iterate over each value in the signal array, the  counter is used for debugging purposes

        ############ declare the dict which will be passed to the model ###############
        inputRecords = {
        }  # the model only accepts data in a specific dict format ...
        inputRecords['c1'] = float(value)  # this format is shown here:
        #-------------------------------------------------------------------------------

        ############ run the HTM model over the inputRecords dict ######################
        result = model.run(inputRecords)
        #-------------------------------------------------------------------------------

        ############ compute the anomaly likelihood and loglikelihood ###################
        current_likelihood = likelihood.anomalyProbability(
            value, result.inferences["anomalyScore"], timestamp=None)
        current_loglikelihood = likelihood.computeLogLikelihood(
            current_likelihood)
        #--------------------------------------------------------------------------------
        ################################ PREDICTIONS ####################################
        bestPredictions = result.inferences[
            "multiStepBestPredictions"]  # obtain the predicted value from infereces dict
        predictions_1.append(bestPredictions[1])
        predictions_5.append(
            bestPredictions[5])  # append the value to the _predict array

        #--------------------------------------------------------------------------------

        ########### add the anomaly values to the respective list #######################
        anom_scores.append(result.inferences["anomalyScore"])
        anom_likelihood.append(current_likelihood)
        anom_loglikelihood.append(current_loglikelihood)
        #--------------------------------------------------------------------------------
        ################# print the input and prediction, for debugging purposes ########
        if counter % 1 == 0:
            #print("Actual input [%d]: %f" % (counter, value))
            print(
                'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}'
                .format(counter, value, predictions_1[counter],
                        predictions_5[counter]))
            #print("Input[%d]: %f" % (counter+1,signal[counter+1]))
            #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"]))
            #print("\n")
        #--------------------------------------------------------------------------------

    ################# save the anomaly and prediction array #########################
    if save == True:
        np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=','
                   )  # the "string" is to differentiate the training and ...
        # the online learning outputs.

        np.savetxt("anom_likelihood_" + string + ".txt",
                   anom_likelihood,
                   delimiter=',')

        np.savetxt("anom_logscore_" + string + ".txt",
                   anom_loglikelihood,
                   delimiter=',')

        np.savetxt("anom_prediction_1" + string + ".txt",
                   predictions_1,
                   delimiter=',')

        np.savetxt("anom_prediction_5" + string + ".txt",
                   predictions_5,
                   delimiter=',')
def runAnomaly(options):
    """
  Create and run a CLA Model on the given dataset (based on the hotgym anomaly
  client in NuPIC).
  """
    # Load the model params JSON
    with open("model_params.json") as fp:
        modelParams = json.load(fp)

    # Update the resolution value for the encoder
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    resolution = options.resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
    print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        # Open file and setup headers
        # Here we write the log likelihood value as the 'anomaly score'
        # The actual CLA outputs are labeled 'raw anomaly score'
        reader = csv.reader(fin)
        csvWriter = csv.writer(open(options.outputFile, "wb"))
        csvWriter.writerow([
            "timestamp", "value", "_raw_score", "likelihood_score",
            "log_likelihood_score"
        ])
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        # Iterate through each record in the CSV file
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            inputData["value"] = float(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #inputData["dttm"] = datetime.datetime.now()

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)
            anomalyScore = result.inferences['anomalyScore']

            # Compute the Anomaly Likelihood
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])
            logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood)
            if likelihood > 0.9999:
                print "Anomaly detected:", inputData['dttm'], inputData[
                    'value'], likelihood

            # Write results to the output CSV file
            csvWriter.writerow([
                inputData["dttm"], inputData["value"], anomalyScore,
                likelihood, logLikelihood
            ])

            # Progress report
            if (i % 1000) == 0: print i, "records processed"

    print "Completed processing", i, "records at", datetime.datetime.now()
    print "Anomaly scores for", options.inputFile,
    print "have been written to", options.outputFile
示例#21
0
    def __init__(self, config):

        # Instantiate NuPIC model
        model_params = base_model_params.MODEL_PARAMS
        model_params['modelParams']['sensorParams']['encoders']['value'][
            'resolution'] = config['resolution']

        self.model = ModelFactory.create(model_params)

        self.model.enableInference({'predictedField': 'value'})

        # The shifter is used to bring the predictions to the actual time frame
        self.shifter = InferenceShifter()

        # The anomaly likelihood object
        self.anomalyLikelihood = AnomalyLikelihood()

        # Set stream source
        self.stream = config['stream']

        # Setup class variables
        self.db = redis.Redis('localhost')
        self.seconds_per_request = config['seconds_per_request']
        self.webhook = config['webhook']
        self.anomaly_threshold = config['anomaly_threshold']
        self.likelihood_threshold = config['likelihood_threshold']
        self.domain = config['domain']
        self.alert = False  # Toogle when we get above threshold

        # Setup logging
        self.logger = logger or logging.getLogger(__name__)
        handler = logging.handlers.RotatingFileHandler(
            os.environ['LOG_DIR'] + "/monitor_%s.log" % self.stream.name,
            maxBytes=1024 * 1024,
            backupCount=4,
        )

        handler.setFormatter(
            logging.Formatter(
                '[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s'
            ))
        handler.setLevel(logging.INFO)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

        self.logger.info("=== Settings ===")
        self.logger.info("Webhook: %s", self.webhook)
        self.logger.info("Domain: %s", self.domain)
        self.logger.info("Seconds per request: %d", self.seconds_per_request)

        # Write metadata to Redis
        try:
            # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly'
            self.db.set('name:%s' % self.stream.id, self.stream.name)
            self.db.set('value_label:%s' % self.stream.id,
                        self.stream.value_label)
            self.db.set('value_unit:%s' % self.stream.id,
                        self.stream.value_unit)
        except Exception:
            self.logger.warn("Could not write results to redis.",
                             exc_info=True)
parser = argparse.ArgumentParser(description='Add to existing name')
parser.add_argument(
    '--algo',
    help='add to existing name especially if I am testing some new feature.')
args = parser.parse_args()
algo = args.algo


def get_all_files_path(root):
    files = [
        val for sublist in [[os.path.join(i[0], j) for j in i[2]]
                            for i in os.walk(root)] for val in sublist
    ]
    return files


files = get_all_files_path('results/' + algo)
for f in files:
    if (not ('_score' in f)):
        print(f)
        df = pd.read_csv(f)
        a = []
        al = AnomalyLikelihood()
        for i in range(len(df)):
            a.append(
                al.anomalyProbability(df.value.values[i],
                                      df.anomaly_score.values[i],
                                      df.timestamp.values[i]))
        df['anomaly_score'] = a
        df.to_csv(f, index=False)
示例#23
0
    predictedSegmentDecrement=
    0.0004,  #punishment for SEGMENTS for incorrect predictions
    #from nupic documentation: predictedSegmentDecrement: A good value is just a bit larger than (the column-level sparsity * permanenceIncrement)...
    #So, if column-level sparsity is 2% and permanenceIncrement is 0.01, this parameter should be something like 4% * 0.01 = 0.0004).
    seed=1960,
    maxSegmentPerCell=255,
    maxSynapsesPerSegment=255)

############## ANOMALY DETECTIONS #############

anom_score = np.zeros((N_DATA + 1, ))
anom_logscore = np.zeros((N_DATA + 1, ))

anomaly_score = Anomaly(slidingWindowSize=25)

anomaly_likelihood = AnomalyLikelihood(learningPeriod=500,
                                       historicWindowSize=213)

dd = 0

for i, linha in enumerate(teste):

    #####################################################

    scalar_encoder.encodeIntoArray(linha[1], bits_scalar)
    time_encoder.encodeIntoArray(linha[0], bits_time)

    encoder_output = np.concatenate((bits_time, bits_scalar))

    ####################################################

    sdr_output = np.zeros(N_COLUMNS)
示例#24
0
# FIFO
events = reversed(events)

if PREDICT: import PREDICTmodel_params as model_params

else: import model_params as model_params

if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE)
else: model = ModelFactory.create(model_params.MODEL_PARAMS)
if VISUALIZE: Patcher().patchCLAModel(model)
model.enableInference({"predictedField": "event"})
print "Model created!\n"

# Get the Model-Classes:
anomalyLikelihood = AnomalyLikelihood()
if PREDICT:
    from nupic.data.inference_shifter import InferenceShifter
    shifter = InferenceShifter()

if (WINDOWSIZE != None):
    AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE)
else:
    AnomalyScores = deque()  # numpy.ones(len(events)), maxlen=len(events) ?
LikelihoodScores = deque()

r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"),
                port=int(os.environ.get("REDIS_PORT", 6379)),
                db=int(os.environ.get("REDIS_DB", 0)))

# Feed data into the model:
示例#25
0
def runAnomaly(options):

    #define local params :

    inputArray = []  #holds all input data
    anomalyArray = []  #holds all output data
    inputThreshold = float(10)  #how many percent of intial samples to ignore
    anomCounter = 0  #counts number of anomalies

    [timeDataFinal, yvalues
     ] = interpolateFunction(inputFileNameInterpol,
                             inputFileNameLocal)  #interpolate the function

    with open("model_params.json") as fp:
        modelParams = json.load(fp)
        #pprint(modelParams)

    #JSON handling
    sensorParams = modelParams['modelParams']['sensorParams']
    numBuckets = modelParams['modelParams']['sensorParams']['encoders'][
        'value'].pop('numBuckets')
    #print numBuckets
    resolution = options.resolution

    #f**k is resolution
    if resolution is None:
        resolution = max(0.001, (options.max - options.min) / numBuckets)
        print "Using resolution value: {0}".format(resolution)
    sensorParams['encoders']['value']['resolution'] = resolution
    #print resolution

    model = ModelFactory.create(modelParams)
    model.enableInference({'predictedField': 'value'})
    with open(options.inputFile) as fin:

        #Open files
        #Setup headers
        reader = csv.reader(fin)
        headers = reader.next()

        # The anomaly likelihood object
        anomalyLikelihood = AnomalyLikelihood()

        #Iterate through each record in the CSV
        print "Starting processing at", datetime.datetime.now()
        for i, record in enumerate(reader, start=1):

            # Convert input data to a dict so we can pass it into the model
            inputData = dict(zip(headers, record))
            #print(inputData)
            inputData["value"] = float(inputData["value"])
            inputArray.append(inputData["value"])
            inputData["dttm"] = dateutil.parser.parse(inputData["dttm"])
            #print inputData

            # Send it to the CLA and get back the raw anomaly score
            result = model.run(inputData)

            #inferences call from nupic
            anomalyScore = result.inferences['anomalyScore']
            anomalyArray.append(anomalyScore)

            #comput likelihood - nupic call
            likelihood = anomalyLikelihood.anomalyProbability(
                inputData["value"], anomalyScore, inputData["dttm"])

        myPlotFunction(inputArray, anomalyArray,
                       inputThreshold)  #plot the output

        #print file
        interpolBool = False
        writeFunction(outputFileName, timeDataFinal, anomalyArray,
                      interpolBool)