def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly score will be discretized to 1/0 (1 if >= binaryAnomalyThreshold) The transformation is applied after moving average is computed and updated. """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0 ): raise ValueError("Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def compute_scores(y_test, y_pred, normalize=False): # Errors errors = np.array((y_test - y_pred)**2) if normalize: errors = errors / float(errors.max() - errors.min()) # Log likelihood. log_likelihoods = [] anomaly_likelihood = AnomalyLikelihood() for i in range(len(y_test)): likelihood = anomaly_likelihood.anomalyProbability(y_test[i], errors[i], timestamp=None) log_likelihood = anomaly_likelihood.computeLogLikelihood(likelihood) log_likelihoods.append(log_likelihood) # Anomaly thresholds: # - HIGH: log_likelihood >= 0.5 # - MEDIUM: 0.5 > log_likelihood >= 0.4 N = len(log_likelihoods) anomalies = {'high': np.zeros(N), 'medium': np.zeros(N)} x = np.array(log_likelihoods) high_idx = x >= 0.5 anomalies['high'][high_idx] = 1 # medium_idx = np.logical_and(x >= 0.4, x < 0.5) # anomalies['medium'][medium_idx] = 1 return errors, log_likelihoods, anomalies
def __init__(self, slidingWindowSize=None, mode=MODE_PURE, binaryAnomalyThreshold=None): self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if (self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED): self._likelihood = AnomalyLikelihood() # probabilistic anomaly else: self._likelihood = None if not self._mode in self._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode) self._binaryThreshold = binaryAnomalyThreshold if binaryAnomalyThreshold is not None and ( not isinstance(binaryAnomalyThreshold, float) or binaryAnomalyThreshold >= 1.0 or binaryAnomalyThreshold <= 0.0): raise ValueError( "Anomaly: binaryAnomalyThreshold must be from (0,1) " "or None if disabled.")
def __init__(self, slidingWindowSize = None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode if slidingWindowSize is not None: self._movingAverage = MovingAverage(windowSize=slidingWindowSize) else: self._movingAverage = None if self._mode == Anomaly.MODE_LIKELIHOOD or self._mode == Anomaly.MODE_WEIGHTED: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def runAvogadroAnomaly(metric, options): """ Create a new HTM Model, fetch the data from the local DB, process it in NuPIC, and save the results to a new CSV output file. :param metric: AvogadroAgent metric class :param options: CLI Options """ model = createModel(metric) model.enableInference({"predictedField": metric.name}) fetched = metric.fetch(prefix=options.prefix, start=None) resultFile = open( os.path.join(options.prefix, metric.name + "-result.csv"), "wb") csvWriter = csv.writer(resultFile) csvWriter.writerow([ "timestamp", metric.name, "raw_anomaly_score", "anomaly_likelihood", "color" ]) headers = ("timestamp", metric.name) anomalyLikelihood = AnomalyLikelihood() for (ts, value) in fetched: try: value = float(value) except (ValueError, TypeError): continue if not math.isnan(value): modelInput = dict(zip(headers, (ts, value))) modelInput[metric.name] = float(value) modelInput["timestamp"] = datetime.datetime.fromtimestamp( float(modelInput["timestamp"])) result = model.run(modelInput) anomalyScore = result.inferences["anomalyScore"] likelihood = anomalyLikelihood.anomalyProbability( modelInput[metric.name], anomalyScore, modelInput["timestamp"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if logLikelihood > .5: color = "red" elif logLikelihood > .4 and logLikelihood <= .5: color = "yellow" else: color = "green" csvWriter.writerow([ modelInput["timestamp"], float(value), anomalyScore, logLikelihood, color ]) else: resultFile.flush()
def get_anomaly_likelihood_calc(self, metric, models_number_below_configured_limit): anomaly_likelihood_calc = None if not self.__loaded_models.anomaly_calc_exists(metric["metric_name"]): anomaly_likelihood_calculators_path = self.__model_storage_manager.get_save_path( metric["metric_name"], path_element="anomaly_likelihood_calculator") if os.path.isfile( os.path.join( anomaly_likelihood_calculators_path, self._anomaly_likelihood_calculator_filename)): if models_number_below_configured_limit: try: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], self.__anomaly_likelihood_calculator_factory. create_anomaly_likelihood_calc_from_disk( metric)) self._logger.debug( "get_anomaly_likelihood_calc", "LOADED ANOMALY_LIKELIHOOD_CALC FROM FILE", metric=str(metric["metric_name"])) except Exception as ex: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], AnomalyLikelihood()) self._logger.warn( "get_anomaly_likelihood_calc", "Failed to create an anomaly likelihood calc from disk", metric=str(metric["metric_name"]), exception_type=str(type(ex).__name__), exception_message=str(ex.message)) else: if models_number_below_configured_limit: self.__loaded_models.add_anomaly_calc_for_metric( metric["metric_name"], AnomalyLikelihood()) if self.__loaded_models.anomaly_calc_exists(metric["metric_name"]): anomaly_likelihood_calc = self.__loaded_models.get_anomaly_calc( metric["metric_name"]) return anomaly_likelihood_calc
def __init__(self, learningPeriod=288, estimationSamples=100, historicWindowSize=8640, reestimationPeriod=100): self.anomalyLikelihood = AnomalyLikelihood( learningPeriod=learningPeriod, estimationSamples=estimationSamples, historicWindowSize=historicWindowSize, reestimationPeriod=reestimationPeriod)
def __init__(self): #self.model_params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0],tmImplementation="cpp") with open("model_params.json") as fp: self.model_params = json.load(fp) print self.model_params self.newmodel = ModelFactory.create(self.model_params) self.newmodel.enableLearning() self.newmodel.enableInference({"predictedField": "value"}) self.DATE_FORMAT = "%d/%m/%Y %H:%M" self.anomalylikelihood = AnomalyLikelihood()
def definir_AnomDetect(N_DATA): """ retorna as classes de anom_score, a classe de anom_likelihood, e os arrays que guardarão a anom_score e a anom_likelihood """ anom_score_txt = np.zeros((N_DATA+1,)) anom_logscore_txt = np.zeros((N_DATA+1,)) anomaly_score = Anomaly(slidingWindowSize=25) anomaly_likelihood = AnomalyLikelihood(learningPeriod=600, historicWindowSize=313) return anomaly_score, anomaly_likelihood, anom_score_txt, anom_logscore_txt
def __init__(self, modelId, stats, replaceParams=()): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs """ self._modelId = modelId self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats, replaceParams=replaceParams) self._anomalyLikelihood = AnomalyLikelihood()
def __init__(self, modelId, stats): """ :param str modelId: model identifier :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. """ self._modelId = modelId # NOTE: ModelRecordEncoder is implemented in the pull request # https://github.com/numenta/nupic/pull/2432 that is not yet in master. self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=self._INPUT_RECORD_SCHEMA) self._model = self._createModel(stats=stats) self._anomalyLikelihood = AnomalyLikelihood()
def _send_predictions(self, metric_id, metric_envelope): if metric_id not in self._models: self._models[metric_id] = ModelFactory.create(self.model_params) self._models[metric_id].enableInference( {'predictedField': 'value'}) self._shifters[metric_id] = InferenceShifter() self._anomaly_likelihood[metric_id] = AnomalyLikelihood() model = self._models[metric_id] shifter = self._shifters[metric_id] modelInput = { # 'dttm': value['metric']['timestamp'], 'dttm': datetime.datetime.now(), 'value': metric_envelope['metric']['value'] } result = shifter.shift(model.run(modelInput)) inferences = result.inferences inference = inferences['multiStepBestPredictions'][5] metric = metric_envelope['metric'] metric_name = metric['name'] if inference is not None: metric['name'] = metric_name + '.nupic.predicted' metric['value'] = inference str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value) if 'anomalyScore' in inferences: metric['name'] = metric_name + '.nupic.anomaly_score' metric['value'] = inferences['anomalyScore'] str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value) anomalyLikelihood = self._anomaly_likelihood[metric_id] likelihood = anomalyLikelihood.anomalyProbability( modelInput['value'], inferences['anomalyScore'], datetime.datetime.now()) metric['name'] = metric_name + '.nupic.anomaly_likelihood' metric['value'] = likelihood str_value = simplejson.dumps(metric_envelope) self._producer.send_messages(self._topic, str_value)
def __init__(self, inputFileObj, inputSpec, aggSpec, modelSpec): """ :param inputFileObj: A file-like object that contains input metric data :param dict inputSpec: Input data specification per input_opt_schema.json :param dict aggSpec: Optional aggregation specification per agg_opt_schema.json or None if no aggregation is requested :param dict modelSpec: Model specification per model_opt_schema.json """ self._inputSpec = inputSpec self._aggSpec = aggSpec self._modelSpec = modelSpec if "modelId" in modelSpec: self._modelId = modelSpec["modelId"] else: self._modelId = "Unknown" inputRecordSchema = ( fieldmeta.FieldMetaInfo(modelSpec["timestampFieldName"], fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo(modelSpec["valueFieldName"], fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) self._aggregator = aggregator.Aggregator( aggregationInfo=dict( fields=([(modelSpec["valueFieldName"], aggSpec["func"])] if aggSpec is not None else []), seconds=aggSpec["windowSize"] if aggSpec is not None else 0 ), inputFields=inputRecordSchema) self._modelRecordEncoder = record_stream.ModelRecordEncoder( fields=inputRecordSchema) self._model = self._createModel(modelSpec=modelSpec) self._anomalyLikelihood = AnomalyLikelihood() self._csvReader = self._createCsvReader(inputFileObj)
def main(): # cluster similar inputs together in SDR space s = SpatialPooler() print(type(s)) # powerful sequence memory in SDR space t = TemporalMemory() print(type(t)) # computes rolling Gaussian based on raw anomaly scores and then their # likelihood a = AnomalyLikelihood() print(type(a)) # temporally groups active cell sets from TM u = UnionTemporalPooler() print(type(u)) # learning pairings of Union representations and labeled classes c = SDRClassifier() print(type(c))
def testLikelihoodValues(self): """ test to see if the region keeps track of state correctly and produces the same likelihoods as the AnomalyLikelihood module """ anomalyLikelihoodRegion = AnomalyLikelihoodRegion() anomalyLikelihood = AnomalyLikelihood() inputs = AnomalyLikelihoodRegion.getSpec()['inputs'] outputs = AnomalyLikelihoodRegion.getSpec()['outputs'] with open(_INPUT_DATA_FILE) as f: reader = csv.reader(f) reader.next() for record in reader: consumption = float(record[1]) anomalyScore = float(record[2]) likelihood1 = anomalyLikelihood.anomalyProbability( consumption, anomalyScore) inputs['rawAnomalyScore'] = numpy.array([anomalyScore]) inputs['metricValue'] = numpy.array([consumption]) anomalyLikelihoodRegion.compute(inputs, outputs) likelihood2 = outputs['anomalyLikelihood'][0] self.assertEqual(likelihood1, likelihood2)
def __init__(self, slidingWindowSize=None, mode=MODE_PURE): """ @param slidingWindowSize (optional) - how many elements are summed up; enables moving average on final anomaly score; int >= 0 @param mode (optional) - (string) how to compute anomaly; possible values are: - "pure" - the default, how much anomal the value is; float 0..1 where 1=totally unexpected - "likelihood" - uses the anomaly_likelihood code; models probability of receiving this value and anomalyScore - "weighted" - "pure" anomaly weighted by "likelihood" (anomaly * likelihood) """ self._mode = mode self._useMovingAverage = slidingWindowSize > 0 self._buf = None self._i = None # Using cumulative anomaly, sliding window if self._useMovingAverage: self._windowSize = slidingWindowSize # Sliding window buffer self._buf = numpy.array([0] * self._windowSize, dtype=numpy.float) self._i = 0 # index pointer to actual position elif slidingWindowSize is not None: raise TypeError( "Anomaly: if you define slidingWindowSize, it has to be an " "integer > 0; slidingWindowSize=%r" % slidingWindowSize) if self._mode == Anomaly.MODE_LIKELIHOOD: self._likelihood = AnomalyLikelihood() # probabilistic anomaly if not self._mode in Anomaly._supportedModes: raise ValueError("Invalid anomaly mode; only supported modes are: " "Anomaly.MODE_PURE, Anomaly.MODE_LIKELIHOOD, " "Anomaly.MODE_WEIGHTED; you used: %r" % self._mode)
def foreach_batch_function(df, epoch_id): # Transform and write batchDF row = df.collect() print "Size of Batch" print(len(row)) if len(row) != 0: for x in range(len(row)): nb = nb + 1 record = {} level = row[x]['level'] #print(type(level)) timestamp = row[x]['@timestamp'] #print(type(timestamp)) #print(timestamp) #timestamp = timestamp.encode("utf-8") level = level.encode("utf-8") if level == 'INFO' or level == 'info': level = 'info' elif level == 'ERROR' or level == 'error': level = 'error' else: level = 'warning' #print 'step 2' record = {"timestamp": timestamp, "level": level} print(record) result = model.run(record) anom = result.inferences['anomalyScore'] #print(anom) record_anomalies.append(anom) #print "Lengths of record anomalies" #print(len(record_anomalies)) mean_anomalies = np.mean(record_anomalies) std_anomalies = np.std(record_anomalies) if std_anomalies == 0: std_anomalies = 0.00001 var_anomalies = np.var(record_anomalies) mean_anomalies_short_window = np.mean( record_anomalies[-int(history):]) likelihood = 1 - ( (norm.cdf(anom, mean_anomalies_short_window - mean_anomalies, std_anomalies)) - (norm.cdf(0, mean_anomalies_short_window - mean_anomalies, std_anomalies))) likelihood_test = 1 - ( anom - (mean_anomalies_short_window - mean_anomalies)) / std_anomalies likelihood_test_test = 1 - qfunction( (mean_anomalies_short_window - mean_anomalies) / std_anomalies) print "Likelihood" print(likelihood_test_test) anomalyLikelihood = AnomalyLikelihood() anomalyProbability = anomalyLikelihood.anomalyProbability( record['level'], anom, record['timestamp']) ani = animation.FuncAnimation(fig, animate, interval=1000, x=nb, y=likelihood_test_test) plt.show() if likelihood_test_test >= 0.85: print "Anomaly detected!" print "Probability od being abnormal", likelihood_test_test #ibefore = i #if ibefore - iafter == 1: # region = region + 1 # if region == 20: # print i-20 # print 'Anomaly detcted!' # print 'Probability of being abnormal', likelihood_test_test # print 'Probability of being abnormal (nupic)', anomalyProbability # region_anomaly = region_anomaly + 1 #else : # region = 0 #iafter = ibefore pass
def runAnomaly(options): global g_ps_count_dict_unsorted global g_abnomal_data_dict_unsorted """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) if options.oswpsDir != "": # Get PS dictionary osw = OSWData(options.oswpsDir, PS) osw.traverse_dir() g_ps_count_dict_unsorted = osw.get_ps_dict() options.max = ps_max_value = max(g_ps_count_dict_unsorted.values()) options.min = ps_min_value = min(g_ps_count_dict_unsorted.values()) print("Min value:" + str(ps_min_value) + ', ' + "Max value:" + str(ps_max_value)) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print("Using resolution value: {0}".format(resolution)) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) if options.inputFile != "": with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" elif options.oswpsDir != "": if options.use_rtm == True: rtm_sensitivity = 2 rtm = LinearRegressionTemoporalMemory(window=10, interval=10, min_=options.min, max_=options.max, boost=rtm_sensitivity, leak_detection=0, critical_region="right_tail", debug=0) g_abnomal_data_dict_unsorted = rtm.analyze( g_ps_count_dict_unsorted) else: csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) ps_od = collections.OrderedDict( sorted(g_ps_count_dict_unsorted.items())) # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, timestamp in enumerate(ps_od): ps_count = ps_od[timestamp] inputData = {} inputData["value"] = float(ps_count) inputData["dttm"] = dateutil.parser.parse(timestamp) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood( likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood g_abnomal_data_dict_unsorted[timestamp] = ps_count # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now( ) print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
def run_model(model, a, b, save=True, aggregate=False, string=''): """Runs the HTM model and generates the anomaly scores. Arguments: :model: the model created with create_model(). :a: the beginning of the anylized signal. :b: the end of the anylized signal. :save: if True then the anomalies output will be saved as .txt. :string: the string to differentiate the name of the saved .txt files. """ ######################### open the signs ########################################### if aggregate == True: signal, time_vect = aggregate_(a, b) print("the size of signal is: {i}".format(i=np.size(signal))) else: signal = open_signs() signal = signal[a:b, 1] #----------------------------------------------------------------------------------- ##################### declare the anomalies lists ################################## anom_scores = [] anom_likelihood = [] anom_loglikelihood = [] #----------------------------------------------------------------------------------- ##################### declare the predicted list ################################### predictions_1 = [] predictions_5 = [] predictions_1.append(0) for i in range(5): predictions_5.append( 0 ) # as this prediction is always made 1 step ahead, then the first value predicted will be ... # the prediction of the index with number 1, therefore doesn't exist a prediction of the 0 ... # index. The same problem occurs with the last signal, because it will predict one more ... # step ahead, this means that after seen the last signal "A", it will predict "A+1" even it doesnt ... # having a matching value in the signal array. #----------------------------------------------------------------------------------- ################ declare the Anom likelihood class ################################# likelihood = AnomalyLikelihood(learningPeriod=300) #----------------------------------------------------------------------------------- for counter, value in enumerate( signal ): # iterate over each value in the signal array, the counter is used for debugging purposes ############ declare the dict which will be passed to the model ############### inputRecords = { } # the model only accepts data in a specific dict format ... inputRecords['c1'] = float(value) # this format is shown here: #------------------------------------------------------------------------------- ############ run the HTM model over the inputRecords dict ###################### result = model.run(inputRecords) #------------------------------------------------------------------------------- ############ compute the anomaly likelihood and loglikelihood ################### current_likelihood = likelihood.anomalyProbability( value, result.inferences["anomalyScore"], timestamp=None) current_loglikelihood = likelihood.computeLogLikelihood( current_likelihood) #-------------------------------------------------------------------------------- ################################ PREDICTIONS #################################### bestPredictions = result.inferences[ "multiStepBestPredictions"] # obtain the predicted value from infereces dict predictions_1.append(bestPredictions[1]) predictions_5.append( bestPredictions[5]) # append the value to the _predict array #-------------------------------------------------------------------------------- ########### add the anomaly values to the respective list ####################### anom_scores.append(result.inferences["anomalyScore"]) anom_likelihood.append(current_likelihood) anom_loglikelihood.append(current_loglikelihood) #-------------------------------------------------------------------------------- ################# print the input and prediction, for debugging purposes ######## if counter % 1 == 0: #print("Actual input [%d]: %f" % (counter, value)) print( 'prediction of [{0}]:(input) {1:8} (1-step) {2:8} (5-step) {3:8}' .format(counter, value, predictions_1[counter], predictions_5[counter])) #print("Input[%d]: %f" % (counter+1,signal[counter+1])) #print("Multi Step Predictions: %s" % (result.inferences["multiStepPredictions"])) #print("\n") #-------------------------------------------------------------------------------- ################# save the anomaly and prediction array ######################### if save == True: np.savetxt("anom_score_" + string + ".txt", anom_scores, delimiter=',' ) # the "string" is to differentiate the training and ... # the online learning outputs. np.savetxt("anom_likelihood_" + string + ".txt", anom_likelihood, delimiter=',') np.savetxt("anom_logscore_" + string + ".txt", anom_loglikelihood, delimiter=',') np.savetxt("anom_prediction_1" + string + ".txt", predictions_1, delimiter=',') np.savetxt("anom_prediction_5" + string + ".txt", predictions_5, delimiter=',')
def runAnomaly(options): """ Create and run a CLA Model on the given dataset (based on the hotgym anomaly client in NuPIC). """ # Load the model params JSON with open("model_params.json") as fp: modelParams = json.load(fp) # Update the resolution value for the encoder sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') resolution = options.resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: # Open file and setup headers # Here we write the log likelihood value as the 'anomaly score' # The actual CLA outputs are labeled 'raw anomaly score' reader = csv.reader(fin) csvWriter = csv.writer(open(options.outputFile, "wb")) csvWriter.writerow([ "timestamp", "value", "_raw_score", "likelihood_score", "log_likelihood_score" ]) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() # Iterate through each record in the CSV file print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) inputData["value"] = float(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #inputData["dttm"] = datetime.datetime.now() # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) anomalyScore = result.inferences['anomalyScore'] # Compute the Anomaly Likelihood likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) logLikelihood = anomalyLikelihood.computeLogLikelihood(likelihood) if likelihood > 0.9999: print "Anomaly detected:", inputData['dttm'], inputData[ 'value'], likelihood # Write results to the output CSV file csvWriter.writerow([ inputData["dttm"], inputData["value"], anomalyScore, likelihood, logLikelihood ]) # Progress report if (i % 1000) == 0: print i, "records processed" print "Completed processing", i, "records at", datetime.datetime.now() print "Anomaly scores for", options.inputFile, print "have been written to", options.outputFile
def __init__(self, config): # Instantiate NuPIC model model_params = base_model_params.MODEL_PARAMS model_params['modelParams']['sensorParams']['encoders']['value'][ 'resolution'] = config['resolution'] self.model = ModelFactory.create(model_params) self.model.enableInference({'predictedField': 'value'}) # The shifter is used to bring the predictions to the actual time frame self.shifter = InferenceShifter() # The anomaly likelihood object self.anomalyLikelihood = AnomalyLikelihood() # Set stream source self.stream = config['stream'] # Setup class variables self.db = redis.Redis('localhost') self.seconds_per_request = config['seconds_per_request'] self.webhook = config['webhook'] self.anomaly_threshold = config['anomaly_threshold'] self.likelihood_threshold = config['likelihood_threshold'] self.domain = config['domain'] self.alert = False # Toogle when we get above threshold # Setup logging self.logger = logger or logging.getLogger(__name__) handler = logging.handlers.RotatingFileHandler( os.environ['LOG_DIR'] + "/monitor_%s.log" % self.stream.name, maxBytes=1024 * 1024, backupCount=4, ) handler.setFormatter( logging.Formatter( '[%(levelname)s/%(processName)s][%(asctime)s] %(name)s %(message)s' )) handler.setLevel(logging.INFO) self.logger.addHandler(handler) self.logger.setLevel(logging.INFO) self.logger.info("=== Settings ===") self.logger.info("Webhook: %s", self.webhook) self.logger.info("Domain: %s", self.domain) self.logger.info("Seconds per request: %d", self.seconds_per_request) # Write metadata to Redis try: # Save in redis with key = 'results:monitor_id' and value = 'time, status, actual, prediction, anomaly' self.db.set('name:%s' % self.stream.id, self.stream.name) self.db.set('value_label:%s' % self.stream.id, self.stream.value_label) self.db.set('value_unit:%s' % self.stream.id, self.stream.value_unit) except Exception: self.logger.warn("Could not write results to redis.", exc_info=True)
parser = argparse.ArgumentParser(description='Add to existing name') parser.add_argument( '--algo', help='add to existing name especially if I am testing some new feature.') args = parser.parse_args() algo = args.algo def get_all_files_path(root): files = [ val for sublist in [[os.path.join(i[0], j) for j in i[2]] for i in os.walk(root)] for val in sublist ] return files files = get_all_files_path('results/' + algo) for f in files: if (not ('_score' in f)): print(f) df = pd.read_csv(f) a = [] al = AnomalyLikelihood() for i in range(len(df)): a.append( al.anomalyProbability(df.value.values[i], df.anomaly_score.values[i], df.timestamp.values[i])) df['anomaly_score'] = a df.to_csv(f, index=False)
predictedSegmentDecrement= 0.0004, #punishment for SEGMENTS for incorrect predictions #from nupic documentation: predictedSegmentDecrement: A good value is just a bit larger than (the column-level sparsity * permanenceIncrement)... #So, if column-level sparsity is 2% and permanenceIncrement is 0.01, this parameter should be something like 4% * 0.01 = 0.0004). seed=1960, maxSegmentPerCell=255, maxSynapsesPerSegment=255) ############## ANOMALY DETECTIONS ############# anom_score = np.zeros((N_DATA + 1, )) anom_logscore = np.zeros((N_DATA + 1, )) anomaly_score = Anomaly(slidingWindowSize=25) anomaly_likelihood = AnomalyLikelihood(learningPeriod=500, historicWindowSize=213) dd = 0 for i, linha in enumerate(teste): ##################################################### scalar_encoder.encodeIntoArray(linha[1], bits_scalar) time_encoder.encodeIntoArray(linha[0], bits_time) encoder_output = np.concatenate((bits_time, bits_scalar)) #################################################### sdr_output = np.zeros(N_COLUMNS)
# FIFO events = reversed(events) if PREDICT: import PREDICTmodel_params as model_params else: import model_params as model_params if LOAD: model = ModelFactory.loadFromCheckpoint(MODELSTATE) else: model = ModelFactory.create(model_params.MODEL_PARAMS) if VISUALIZE: Patcher().patchCLAModel(model) model.enableInference({"predictedField": "event"}) print "Model created!\n" # Get the Model-Classes: anomalyLikelihood = AnomalyLikelihood() if PREDICT: from nupic.data.inference_shifter import InferenceShifter shifter = InferenceShifter() if (WINDOWSIZE != None): AnomalyScores = deque(numpy.ones(WINDOWSIZE), maxlen=WINDOWSIZE) else: AnomalyScores = deque() # numpy.ones(len(events)), maxlen=len(events) ? LikelihoodScores = deque() r = redis.Redis(host=os.environ.get("REDIS_HOST", "127.0.0.1"), port=int(os.environ.get("REDIS_PORT", 6379)), db=int(os.environ.get("REDIS_DB", 0))) # Feed data into the model:
def runAnomaly(options): #define local params : inputArray = [] #holds all input data anomalyArray = [] #holds all output data inputThreshold = float(10) #how many percent of intial samples to ignore anomCounter = 0 #counts number of anomalies [timeDataFinal, yvalues ] = interpolateFunction(inputFileNameInterpol, inputFileNameLocal) #interpolate the function with open("model_params.json") as fp: modelParams = json.load(fp) #pprint(modelParams) #JSON handling sensorParams = modelParams['modelParams']['sensorParams'] numBuckets = modelParams['modelParams']['sensorParams']['encoders'][ 'value'].pop('numBuckets') #print numBuckets resolution = options.resolution #f**k is resolution if resolution is None: resolution = max(0.001, (options.max - options.min) / numBuckets) print "Using resolution value: {0}".format(resolution) sensorParams['encoders']['value']['resolution'] = resolution #print resolution model = ModelFactory.create(modelParams) model.enableInference({'predictedField': 'value'}) with open(options.inputFile) as fin: #Open files #Setup headers reader = csv.reader(fin) headers = reader.next() # The anomaly likelihood object anomalyLikelihood = AnomalyLikelihood() #Iterate through each record in the CSV print "Starting processing at", datetime.datetime.now() for i, record in enumerate(reader, start=1): # Convert input data to a dict so we can pass it into the model inputData = dict(zip(headers, record)) #print(inputData) inputData["value"] = float(inputData["value"]) inputArray.append(inputData["value"]) inputData["dttm"] = dateutil.parser.parse(inputData["dttm"]) #print inputData # Send it to the CLA and get back the raw anomaly score result = model.run(inputData) #inferences call from nupic anomalyScore = result.inferences['anomalyScore'] anomalyArray.append(anomalyScore) #comput likelihood - nupic call likelihood = anomalyLikelihood.anomalyProbability( inputData["value"], anomalyScore, inputData["dttm"]) myPlotFunction(inputArray, anomalyArray, inputThreshold) #plot the output #print file interpolBool = False writeFunction(outputFileName, timeDataFinal, anomalyArray, interpolBool)