def testModelParams(self): """ Test that clusterParams loads returns a valid dict that can be instantiated as a HTMPredictionModel. """ params = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) encodersDict = ( params['modelConfig']['modelParams']['sensorParams']['encoders']) model = ModelFactory.create(modelConfig=params['modelConfig']) self.assertIsInstance( model, HTMPredictionModel, "JSON returned cannot be used to create a model") # Ensure we have a time of day field self.assertIsNotNone(encodersDict['c0_timeOfDay']) # Ensure resolution doesn't get too low if encodersDict['c1']['type'] == 'RandomDistributedScalarEncoder': self.assertGreaterEqual(encodersDict['c1']['resolution'], 0.001, "Resolution is too low") # Ensure tm_cpp returns correct json file params = getScalarMetricWithTimeOfDayAnomalyParams( [0], tmImplementation="tm_cpp") self.assertEqual( params['modelConfig']['modelParams']['tmParams']['temporalImp'], "tm_cpp", "Incorrect json for tm_cpp tmImplementation") # Ensure incorrect tmImplementation throws exception with self.assertRaises(ValueError): getScalarMetricWithTimeOfDayAnomalyParams([0], tmImplementation="")
def testModelParams(self): """ Test that clusterParams loads returns a valid dict that can be instantiated as a CLAModel. """ params = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) encodersDict= ( params['modelConfig']['modelParams']['sensorParams']['encoders']) model = ModelFactory.create(modelConfig=params['modelConfig']) self.assertIsInstance(model, CLAModel, "JSON returned cannot be used to create a model") # Ensure we have a time of day field self.assertIsNotNone(encodersDict['c0_timeOfDay']) # Ensure resolution doesn't get too low if encodersDict['c1']['type'] == 'RandomDistributedScalarEncoder': self.assertGreaterEqual(encodersDict['c1']['resolution'], 0.001, "Resolution is too low") # Ensure tm_cpp returns correct json file params = getScalarMetricWithTimeOfDayAnomalyParams([0], tmImplementation="tm_cpp") self.assertEqual(params['modelConfig']['modelParams']['tpParams']['temporalImp'], "tm_cpp", "Incorrect json for tm_cpp tmImplementation") # Ensure incorrect tmImplementation throws exception with self.assertRaises(ValueError): getScalarMetricWithTimeOfDayAnomalyParams([0], tmImplementation="")
def _createModel(cls, stats, replaceParams): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :param sequence replaceParams: Parameter replacement PATH REPLACEMENT pairs :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) for path, replacement in replaceParams: _recurseDictAndReplace(swarmParams, path.split(_REPLACE_PATH_SEPARATOR), replacement) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model
def generateSwarmParams(stats): """ Generate parameters for creating a model :param stats: dict with "min", "max" and optional "minResolution"; values must be integer, float or None. :returns: if either minVal or maxVal is None, returns None; otherwise returns swarmParams object that is suitable for passing to startMonitoring and startModel """ minVal = stats.get("min") maxVal = stats.get("max") minResolution = stats.get("minResolution") if minVal is None or maxVal is None: return None # Create possible swarm parameters based on metric data swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=minVal, maxVal=maxVal, minResolution=minResolution) swarmParams["inputRecordSchema"] = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) return swarmParams
def __init__(self, predictStep, enablePredict, maxValue, minValue, minResolution): # initial the parameters and data variables. self.predictStep = predictStep self.enablePredict = enablePredict self.metricData = xrange(int(minValue), int(maxValue), int((maxValue - minValue) / minResolution)) self.maxValue = maxValue self.minValue = minValue self.minResolution = minResolution self.timestamp = None self.actualValue = None self.predictValue = None self.anomalyScore = None self.modelResult = None self.output = None # get the model parameters. self.parameters = getScalarMetricWithTimeOfDayAnomalyParams( self.metricData, self.minValue, self.maxValue, self.minResolution) # make sure the result contains the predictions. self.parameters["modelConfig"]["modelParams"][ "clEnable"] = self.enablePredict # so we can modify the predict step by do that: self.parameters["modelConfig"]["modelParams"]["clParams"][ "steps"] = self.predictStep # create the model self.model = ModelFactory.create(self.parameters["modelConfig"]) self.model.enableInference(self.parameters["inferenceArgs"])
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="cpp")["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) if self.useLikelihood: # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def _getModelParams(useTimeOfDay, useDayOfWeek, values): """ Return a JSON object describing the model configuration :param bool useTimeOfDay: whether to use timeOfDay encoder :param bool useDayOfWeek: whether to use dayOfWeej encoder :param values: numpy array of data values, used to compute min/max values """ modelParams = getScalarMetricWithTimeOfDayAnomalyParams(metricData=values) if useTimeOfDay: modelParams['modelConfig']['modelParams']['sensorParams']['encoders'] \ ['c0_timeOfDay'] = dict(fieldname='c0', name='c0', type='DateEncoder', timeOfDay=(21, 9)) else: modelParams['modelConfig']['modelParams']['sensorParams']['encoders'] \ ['c0_timeOfDay'] = None if useDayOfWeek: modelParams['modelConfig']['modelParams']['sensorParams']['encoders'] \ ['c0_dayOfWeek'] = dict(fieldname='c0', name='c0', type='DateEncoder', dayOfWeek=(21, 3)) else: modelParams['modelConfig']['modelParams']['sensorParams']['encoders'] \ ['c0_dayOfWeek'] = None modelParams["timestampFieldName"] = "c0" modelParams["valueFieldName"] = "c1" return modelParams
def getParams(columnNb, min, max): params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=float(min), maxVal=float(max)) pprint.pprint(params)
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin-rangePadding, maxVal=self.inputMax+rangePadding, minResolution=0.001, tmImplementation="tm_cpp" )["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) # Initialize the anomaly likelihood object numentaLearningPeriod = int(math.floor(self.probationaryPeriod / 2.0)) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod-numentaLearningPeriod, reestimationPeriod=100 )
def setUp(self): swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=0, maxVal=100, minResolution=None) self.modelConfig = swarmParams["modelConfig"] self.inferenceArgs = swarmParams["inferenceArgs"] self.timestampFieldName = "c0" self.valueFieldName = "c1"
def createModel(metric): min = metrics[metric]["min"] max = metrics[metric]["max"] params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[ 0 ], # just dummy data unless you want to send in some real data here minVal=min, maxVal=max, minResolution=0.001, # you may need to tune this #0.001 tmImplementation="cpp") # cpp model = ModelFactory.create(params["modelConfig"]) model.enableInference({"predictedField": "c1"}) return model
def get_params(min_val, max_val): """ Returns a dict containing the model parameters. :min_val: the 'expected' minimum value of the scalar data :max_val: the 'expected' max value of the scalar data """ params = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], tmImplementation="cpp", minVal=min_val, maxVal=max_val) with open('parameters.json', 'w') as outfile: json.dump(params, outfile, indent=4) return params
def _getModelParams(useTimeOfDay, useDayOfWeek, values): """ Return a JSON object describing the model configuration. @param useTimeOfDay (bool) whether to use timeOfDay encoder @param useDayOfWeek (bool) whether to use dayOfWeej encoder @param values (numpy array) data values, used to compute min/max values @return (dict) A dictionary of model parameters """ # Get params in the same fashion as NAB, setting the RDSE resolution inputMin = numpy.min(values) inputMax = numpy.max(values) rangePadding = abs(inputMax - inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=inputMin - rangePadding, maxVal=inputMax + rangePadding, minResolution=0.001 ) if useTimeOfDay: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_timeOfDay"] = dict(fieldname="c0", name="c0", type="DateEncoder", timeOfDay=(21, 9.49122334747737)) else: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_timeOfDay"] = None if useDayOfWeek: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_dayOfWeek"] = dict(fieldname="c0", name="c0", type="DateEncoder", dayOfWeek=(21, 3)) else: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_dayOfWeek"] = None modelParams["timestampFieldName"] = "c0" modelParams["valueFieldName"] = "c1" return modelParams
def runAnomaly(): params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[ 0 ], # just dummy data unless you want to send in some real data here minVal=38, maxVal=55, minResolution=0.001, # you may need to tune this #0.001 tmImplementation="cpp") #cpp model = createModel(params["modelConfig"]) # model.enableInference({'predictedField': 'c1'}) with open(_INPUT_DATA_FILE) as fin: reader = csv.reader(fin) csvWriter = csv.writer(open(_OUTPUT_PATH, "a")) # csvWriter.writerow(["timestamp", "value", "anomaly_score", "anomaly_likehood", "label"]) headers = reader.next() reader.next() reader.next() anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( historicWindowSize=1152) #, learningPeriod=1152 for i, record in enumerate(reader, start=1): modelInput = dict(zip(headers, record)) modelInput["c1"] = float(modelInput["c1"]) value = modelInput["c1"] modelInput["c0"] = datetime.datetime.strptime( modelInput["c0"], "%Y-%m-%d %H:%M:%S") timestamp = modelInput["c0"] result = model.run(modelInput) anomalyScore = result.inferences['anomalyScore'] anomalyLikelyhood2 = anomalyLikelihood.anomalyProbability( value, anomalyScore, timestamp) if i == lines: if anomalyLikelyhood2 > _ANOMALY_THRESHOLD: _LOGGER.info( "Anomaly detected at [%s]. Anomaly score: %f.", result.rawInput["c0"], anomalyScore) anomaly = 1 else: anomaly = 0 csvWriter.writerow([ timestamp, value, anomalyScore, anomalyLikelyhood2, anomaly ]) return anomaly # else: # csvWriter.writerow([timestamp, value, anomalyScore, anomalyLikelyhood2, modelInput["label"]]) print("Anomaly scores have been written to " + _OUTPUT_PATH)
def create_model(self): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for "pred_field". """ print os.path.abspath(self.output_fpath) if not self.bestParams: self.model_fpath = os.path.join(self.output_fpath, self.pred_field).replace("/", ".") self.model_params_name = 'model_params' + self.suffix print "Creating model from %s..." % self.model_params_name self.get_model_params() else: self.model_params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], tmImplementation="cpp", minResolution=self.resolution, minVal=self.minVal, maxVal=self.maxVal)["modelConfig"] self.model_params["modelParams"]["sensorParams"][ "encoders"] = Modelrunner.setEncoderParams( self.model_params["modelParams"]["sensorParams"] ["encoders"], self.pred_field) model_dir = self.output_fpath + "model_params/" if not os.path.exists(os.path.dirname(model_dir)): os.makedirs(model_dir) ff = open(os.path.join(model_dir, "__init__.py"), "w") ff.close() with open(model_dir + self.pred_field + "_model_params.py", 'w') as fp: json.dump(self.model_params, fp, indent=4) self.model = ModelFactory.create(self.model_params) self.model.enableInference({"predictedField": self.pred_field})
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 self.modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="tm_cpp")["modelConfig"] self._setupEncoderParams( self.modelParams["modelParams"]["sensorParams"]["encoders"]) # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod - numentaLearningPeriod, reestimationPeriod=100)
def createModel(InputName): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for predicted field. :param modelParams: Model params dict :return: OPF Model object """ # Get the new parameters from the csv file ImportParams = getNewParams(InputName) params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=ImportParams[1], tmImplementation="cpp", minVal=ImportParams[2], maxVal=ImportParams[3]) params['modelConfig']['modelParams']['clEnable'] = True model = ModelFactory.create(modelConfig=params["modelConfig"]) model.enableLearning() model.enableInference(params["inferenceArgs"]) return model
def _createModel(cls, stats): """Instantiate and configure an OPF model :param dict stats: Metric data stats per stats_schema.json in the unicorn_backend package. :returns: OPF Model instance """ # Generate swarm params swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=stats["min"], maxVal=stats["max"], minResolution=stats.get("minResolution")) model = ModelFactory.create(modelConfig=swarmParams["modelConfig"]) model.enableLearning() model.enableInference(swarmParams["inferenceArgs"]) return model
def initialize(self, lower_data_limit=-1e9, upper_data_limit=1e9, probation_number=750, spatial_tolerance=0.05): """ Any data that is not in the range [lower_data_limit, upper_data_limit] will be regarded as anomaly directly the algorithm will treat the first probation_number input as a reference to calculate likelihood It is expect that no anomaly should be in the first probation_number sample, the longer the better """ self.probationary_period = probation_number self.input_min = lower_data_limit self.input_max = upper_data_limit # Fraction outside of the range of values seen so far that will be considered # a spatial anomaly regardless of the anomaly likelihood calculation. This # accounts for the human labelling bias for spatial values larger than what # has been seen so far. self.spatial_tolerance = spatial_tolerance # Get config params, setting the RDSE resolution range_padding = abs(self.input_max - self.input_min) * 0.2 model_params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.input_min - range_padding, maxVal=self.input_max + range_padding, minResolution=0.001, tmImplementation="cpp" )["modelConfig"] self._setupEncoderParams( model_params["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(model_params) self.model.enableInference({"predictedField": "value"}) if self.useLikelihood: # Initialize the anomaly likelihood object numenta_learning_period = int(math.floor(self.probationary_period / 2.0)) self.anomaly_likelihood = anomaly_likelihood.AnomalyLikelihood( learningPeriod=numenta_learning_period, estimationSamples=self.probationary_period - numenta_learning_period, reestimationPeriod=100 )
def __init__(self, fields, predictStep, enablePredict, maxValue, minValue, minResolution): # # initial the parameters and data variables. self.fields = fields self.predictStep = predictStep self.enablePredict = enablePredict # metirc data for HTM parameters. self.metricData = {} for i in range(len(self.fields)): self.metricData[self.fields[i]] = xrange( int(minValue[i]), int(maxValue[i]), int((maxValue[i] - minValue[i]) / minResolution[i])) self.maxValue = maxValue self.minValue = minValue self.minResolution = minResolution self.timestamp = None self.actualValue = None self.predictValue = None self.anomalyScore = None self.parameters = None self.model = None self.models = {} self.modelResult = None self.output = {} # one HTM model for one field. for i in range(len(self.fields)): # get the model parameters. self.parameters = getScalarMetricWithTimeOfDayAnomalyParams( self.metricData[self.fields[i]], self.minValue[i], self.maxValue[i], self.minResolution[i]) # make sure the result contains the predictions. self.parameters["modelConfig"]["modelParams"][ "clEnable"] = self.enablePredict # so we can modify the predict step by do that: self.parameters["modelConfig"]["modelParams"]["clParams"][ "steps"] = self.predictStep # create the model self.model = ModelFactory.create(self.parameters["modelConfig"]) self.model.enableInference(self.parameters["inferenceArgs"]) self.models[self.fields[i]] = self.model
def initialize(self, inputMin, inputMax): # Get config params, setting the RDSE resolution self.inputMin = inputMin self.inputMax = inputMax rangePadding = abs(self.inputMax - self.inputMin) * 0.2 modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin - rangePadding, maxVal=self.inputMax + rangePadding, minResolution=0.001, tmImplementation="cpp")["modelConfig"] self._setupEncoderParams( modelParams["modelParams"]["sensorParams"]["encoders"]) self.model = ModelFactory.create(modelParams) self.model.enableInference({"predictedField": "value"}) # Initialize the anomaly likelihood object self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood()
def initialize(self): # Get config params, setting the RDSE resolution rangePadding = abs(self.inputMax - self.inputMin) * 0.2 self.modelParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=self.inputMin-rangePadding, maxVal=self.inputMax+rangePadding, minResolution=0.001, tmImplementation="tm_cpp" )["modelConfig"] self._setupEncoderParams( self.modelParams["modelParams"]["sensorParams"]["encoders"]) # Initialize the anomaly likelihood object numentaLearningPeriod = math.floor(self.probationaryPeriod / 2.0) self.anomalyLikelihood = anomaly_likelihood.AnomalyLikelihood( claLearningPeriod=numentaLearningPeriod, estimationSamples=self.probationaryPeriod-numentaLearningPeriod, reestimationPeriod=100 )
def generateSwarmParams(stats, classifierEnabled=False): """ Generate parameters for creating a model :param stats: dict with "min", "max" and optional "minResolution"; values must be integer, float or None. :param classifierEnabled: A Boolean value to be given to the 'clEnable' property of 'modelParams'. As the classifier generates multi-step best predictions, setting this value to True will allow multi-step best predictions to be populated in the metric_data table for the associated metric of the model. :returns: if either minVal or maxVal is None, returns None; otherwise returns swarmParams object that is suitable for passing to startMonitoring and startModel """ minVal = stats.get("min") maxVal = stats.get("max") minResolution = stats.get("minResolution") if minVal is None or maxVal is None: return None # Create possible swarm parameters based on metric data swarmParams = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[0], minVal=minVal, maxVal=maxVal, minResolution=minResolution) # Classifier must be enabled to obtain predicted values swarmParams["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled swarmParams["inputRecordSchema"] = ( fieldmeta.FieldMetaInfo("c0", fieldmeta.FieldMetaType.datetime, fieldmeta.FieldMetaSpecial.timestamp), fieldmeta.FieldMetaInfo("c1", fieldmeta.FieldMetaType.float, fieldmeta.FieldMetaSpecial.none), ) return swarmParams
def testModelParams(self): """ Test that clusterParams loads returns a valid dict that can be instantiated as a CLAModel. """ params = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) encodersDict = ( params['modelConfig']['modelParams']['sensorParams']['encoders']) model = ModelFactory.create(modelConfig=params['modelConfig']) self.assertIsInstance( model, CLAModel, "JSON returned cannot be used to create a model") # Ensure we have a time of day field self.assertIsNotNone(encodersDict['c0_timeOfDay']) # Ensure resolution doesn't get too low if encodersDict['c1']['type'] == 'RandomDistributedScalarEncoder': self.assertGreaterEqual(encodersDict['c1']['resolution'], 0.001, "Resolution is too low")
def _getModelParams(useTimeOfDay, useDayOfWeek, values): """ Return a JSON object describing the model configuration. @param useTimeOfDay (bool) whether to use timeOfDay encoder @param useDayOfWeek (bool) whether to use dayOfWeej encoder @param values (numpy array) data values, used to compute min/max values @return (dict) A dictionary of model parameters """ modelParams = getScalarMetricWithTimeOfDayAnomalyParams(metricData=values) if useTimeOfDay: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_timeOfDay"] = dict(fieldname="c0", name="c0", type="DateEncoder", timeOfDay=(21, 9)) else: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_timeOfDay"] = None if useDayOfWeek: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_dayOfWeek"] = dict(fieldname="c0", name="c0", type="DateEncoder", dayOfWeek=(21, 3)) else: modelParams["modelConfig"]["modelParams"]["sensorParams"]["encoders"] \ ["c0_dayOfWeek"] = None modelParams["timestampFieldName"] = "c0" modelParams["valueFieldName"] = "c1" return modelParams
def testModelParams(self): """ Test that clusterParams loads returns a valid dict that can be instantiated as a CLAModel. """ params = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) encodersDict= ( params['modelConfig']['modelParams']['sensorParams']['encoders']) model = ModelFactory.create(modelConfig=params['modelConfig']) self.assertIsInstance(model, CLAModel, "JSON returned cannot be used to create a model") # Ensure we have a time of day field self.assertIsNotNone(encodersDict['c0_timeOfDay']) # Ensure resolution doesn't get too low if encodersDict['c1']['type'] == 'RandomDistributedScalarEncoder': self.assertGreaterEqual(encodersDict['c1']['resolution'], 0.001, "Resolution is too low")
def createModel(modelParams): """ Given a model params dictionary, create a CLA Model. Automatically enables inference for kw_energy_consumption. :param modelParams: Model params dict :return: OPF Model object """ model = ModelFactory.create(modelParams) model.enableInference({"predictedField": "c1"}) return model params = getScalarMetricWithTimeOfDayAnomalyParams( metricData=[ 0 ], # just dummy data unless you want to send in some real data here minVal=0, maxVal=100, minResolution=0.001, # you may need to tune this tmImplementation="cpp") # Here, you can print out the params cause its is just a dict, and change # them to suit your needs. Here, I'll just print them out so you can see them: pprint(params) # Now use these params to create a model model = createModel(params["modelConfig"]) # # Open the file to loop over each row # with open ("/home/marta/PycharmProjects/CYBEROPS/MAQUINAS/Labeled_data/ec2_disk_write_bytes_c0d644_labeled (copia).csv") as fileIn: # reader = csv.reader(fileIn) # # The first three rows are not data, but we'll need the field names when # # passing data into the model.
def testCloneModel(self): modelSchedulerSubprocess = self._startModelSchedulerSubprocess() self.addCleanup(lambda: modelSchedulerSubprocess.kill() if modelSchedulerSubprocess.returncode is None else None) modelID = "abc" destModelID = "def" resultBatches = [] with ModelSwapperInterface() as swapperAPI: args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 1) # Clone the just-defined model _LOGGER.info("Cloning model") swapperAPI.cloneModel(modelID, destModelID, commandID="cloneModelCmd1") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 2) # Send input rows to the clone inputRows = [ ModelInputRow(rowID="rowfoo", data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]), ModelInputRow(rowID="rowbar", data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info("Submitting batch of %d input rows...", len(inputRows)) swapperAPI.submitRequests(modelID=destModelID, requests=inputRows) _LOGGER.info("These models have pending input: %s", swapperAPI.getModelsWithInputPending()) resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 3) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # Delete the model _LOGGER.info("Deleting the model") swapperAPI.deleteModel(modelID=destModelID, commandID="deleteModelCmd1") _LOGGER.info("Waiting for model deletion result") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 4) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=destModelID))) # Verify results # First result batch should be the defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should for the cloneModel result batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "cloneModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "cloneModelCmd1") # The third batch should be for the two input rows batch = resultBatches[2] self.assertEqual(batch.modelID, destModelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) # The fourth batch should be for the "deleteModel" batch = resultBatches[3] self.assertEqual(batch.modelID, destModelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") # Signal Model Scheduler Service subprocess to shut down and wait for it waitResult = dict() def runWaiterThread(): try: waitResult["returnCode"] = modelSchedulerSubprocess.wait() except: _LOGGER.exception("Waiting for modelSchedulerSubprocess failed") waitResult["exceptionInfo"] = traceback.format_exc() raise return modelSchedulerSubprocess.terminate() waiterThread = threading.Thread(target=runWaiterThread) waiterThread.setDaemon(True) waiterThread.start() waiterThread.join(timeout=30) self.assertFalse(waiterThread.isAlive()) self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))
try: import capnp import serializable_test_capnp except ImportError: # Ignore for platforms in which capnp is not available, e.g. windows capnp = None import nupic from nupic.frameworks.opf.common_models.cluster_params import ( getScalarMetricWithTimeOfDayAnomalyParams) from nupic.serializable import Serializable MODEL_PARAMS = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) SERIALIZABLE_SUBCLASSES = { "MovingAverage": { "params": {"windowSize": 1} }, "AnomalyLikelihood": {}, "BacktrackingTM": {}, "Connections": {"params": {"numCells": 1}}, "TemporalMemory": {}, "KNNClassifier": {}, "SDRClassifier": {}, "SpatialPooler": { "params": {"inputDimensions": (2, 2), "columnDimensions": (4, 4)} },
try: import capnp import serializable_test_capnp except ImportError: # Ignore for platforms in which capnp is not available, e.g. windows capnp = None import nupic from nupic.frameworks.opf.common_models.cluster_params import ( getScalarMetricWithTimeOfDayAnomalyParams) from nupic.serializable import Serializable MODEL_PARAMS = getScalarMetricWithTimeOfDayAnomalyParams([0], minVal=23.42, maxVal=23.420001) SERIALIZABLE_SUBCLASSES = { "MovingAverage": { "params": { "windowSize": 1 } }, "AnomalyLikelihood": {}, "BacktrackingTM": {}, "Connections": { "params": { "numCells": 1 } },
def testModelSwapper(self): """Simple end-to-end test of the model swapper system.""" modelSchedulerSubprocess = self._startModelSchedulerSubprocess() self.addCleanup(lambda: modelSchedulerSubprocess.kill() if modelSchedulerSubprocess.returncode is None else None) modelID = "foobar" resultBatches = [] with ModelSwapperInterface() as swapperAPI: args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") # Attempt to define the same model again _LOGGER.info("Defining the model again") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd2") # Send input rows to the model inputRows = [ ModelInputRow( rowID="rowfoo", data=[datetime.datetime(2013, 5, 23, 8, 13, 00), 5.3]), ModelInputRow( rowID="rowbar", data=[datetime.datetime(2013, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info("Submitting batch of %d input rows...", len(inputRows)) swapperAPI.submitRequests(modelID=modelID, requests=inputRows) _LOGGER.info("These models have pending input: %s", swapperAPI.getModelsWithInputPending()) # Retrieve all results. # NOTE: We collect results via background thread to avoid # deadlocking the test runner in the event consuming blocks unexpectedly _LOGGER.info("Reading all batches of results...") numBatchesExpected = 3 resultBatches.extend( self._consumeResults(numBatchesExpected, timeout=20)) self.assertEqual(len(resultBatches), numBatchesExpected) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # Delete the model _LOGGER.info("Deleting the model") swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") _LOGGER.info("Waiting for model deletion result") resultBatches.extend(self._consumeResults(1, timeout=20)) self.assertEqual(len(resultBatches), 4) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=modelID))) # Try deleting the model again, to make sure there are no exceptions _LOGGER.info("Attempting to delete the model again") swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") # Verify results # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should for the second defineModel result for the # same model batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd2") # The third batch should be for the two input rows batch = resultBatches[2] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) # The fourth batch should be for the "deleteModel" batch = resultBatches[3] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") # Signal Model Scheduler Service subprocess to shut down and wait for it waitResult = dict() def runWaiterThread(): try: waitResult["returnCode"] = modelSchedulerSubprocess.wait() except: _LOGGER.exception( "Waiting for modelSchedulerSubprocess failed") waitResult["exceptionInfo"] = traceback.format_exc() raise return modelSchedulerSubprocess.terminate() waiterThread = threading.Thread(target=runWaiterThread) waiterThread.setDaemon(True) waiterThread.start() waiterThread.join(timeout=30) self.assertFalse(waiterThread.isAlive()) self.assertEqual(waitResult["returnCode"], 0, msg=repr(waitResult))
def _auxTestRunModelWithFullThenIncrementalCheckpoints(self, classifierEnabled): modelID = "foobar" checkpointMgr = model_checkpoint_mgr.ModelCheckpointMgr() args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) args["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) with ModelSwapperInterface() as swapperAPI: # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") # Send input rows to the model inputRows = [ ModelInputRow(rowID="rowfoo", data=[datetime.datetime(2014, 5, 23, 8, 13, 00), 5.3]), ModelInputRow(rowID="rowbar", data=[datetime.datetime(2014, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info("Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows), inputRows[0].rowID, inputRows[-1].rowID) swapperAPI.submitRequests(modelID=modelID, requests=inputRows) # Run model_runner and collect results with self._startModelRunnerSubprocess(modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=2, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) self.assertEqual(len(resultBatches), 2, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should be for the two input rows batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) # Verify model checkpoint model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn(model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertEqual( len(attrs[model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME]), 2, msg=repr(attrs)) self.assertNotIn( model_runner._ModelArchiver._INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) # Now, check incremental checkpointing inputRows2 = [ ModelInputRow(rowID=2, data=[datetime.datetime(2014, 5, 23, 8, 13, 20), 2.7]), ModelInputRow(rowID=3, data=[datetime.datetime(2014, 5, 23, 8, 13, 25), 3.9]), ] _LOGGER.info("Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows2), inputRows2[0].rowID, inputRows2[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows2) with self._startModelRunnerSubprocess(modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows2)) for inputRow, result in zip(inputRows2, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn(model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( attrs[model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn( model_runner._ModelArchiver._INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in inputRows2], msg=repr(attrs)) # Final run with incremental checkpointing inputRows3 = [ ModelInputRow(rowID=4, data=[datetime.datetime(2014, 5, 23, 8, 13, 30), 4.7]), ModelInputRow(rowID=5, data=[datetime.datetime(2014, 5, 23, 8, 13, 35), 5.9]), ] _LOGGER.info("Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows3), inputRows3[0].rowID, inputRows3[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows3) with self._startModelRunnerSubprocess(modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows3)) for inputRow, result in zip(inputRows3, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn(model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( attrs[model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn( model_runner._ModelArchiver._INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in itertools.chain(inputRows2, inputRows3)], msg=repr(attrs)) # Delete the model _LOGGER.info("Deleting the model=%s", modelID) swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") with self._startModelRunnerSubprocess(modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) self.assertEqual(len(resultBatches), 1, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=modelID))) # The model checkpoint should be gone too with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.load(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadModelDefinition(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadCheckpointAttributes(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.remove(modelID)
import nupic import json import csv import pprint import matplotlib.pyplot as plt from datetime import datetime as dt from nupic.encoders.date import DateEncoder from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder from nupic.encoders.date import DateEncoder from nupic.algorithms.spatial_pooler import SpatialPooler from nupic.algorithms.temporal_memory import TemporalMemory from nupic.algorithms.anomaly import Anomaly from nupic.algorithms.anomaly_likelihood import AnomalyLikelihood from nupic.frameworks.opf.common_models.cluster_params import getScalarMetricWithTimeOfDayAnomalyParams from nupic.frameworks.opf.model_factory import ModelFactory import numpy as np import pandas as pd import os import time t = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=-5, maxVal=5, tmImplementation="cpp") pp = pprint.PrettyPrinter(indent=1) pp.pprint(t)
def _auxTestRunModelWithFullThenIncrementalCheckpoints( self, classifierEnabled): modelID = "foobar" checkpointMgr = model_checkpoint_mgr.ModelCheckpointMgr() args = getScalarMetricWithTimeOfDayAnomalyParams(metricData=[0], minVal=0, maxVal=1000) args["modelConfig"]["modelParams"]["clEnable"] = classifierEnabled # Submit requests including a model creation command and two data rows. args["inputRecordSchema"] = ( FieldMetaInfo("c0", FieldMetaType.datetime, FieldMetaSpecial.timestamp), FieldMetaInfo("c1", FieldMetaType.float, FieldMetaSpecial.none), ) with ModelSwapperInterface() as swapperAPI: # Define the model _LOGGER.info("Defining the model") swapperAPI.defineModel(modelID=modelID, args=args, commandID="defineModelCmd1") # Send input rows to the model inputRows = [ ModelInputRow( rowID="rowfoo", data=[datetime.datetime(2014, 5, 23, 8, 13, 00), 5.3]), ModelInputRow( rowID="rowbar", data=[datetime.datetime(2014, 5, 23, 8, 13, 15), 2.4]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows), inputRows[0].rowID, inputRows[-1].rowID) swapperAPI.submitRequests(modelID=modelID, requests=inputRows) # Run model_runner and collect results with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=2, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: # The results message queue should be empty now self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) self.assertEqual(len(resultBatches), 2, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "defineModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "defineModelCmd1") # The second result batch should be for the two input rows batch = resultBatches[1] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows)) for inputRow, result in zip(inputRows, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) # Verify model checkpoint model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertEqual(len(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME]), 2, msg=repr(attrs)) self.assertNotIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) # Now, check incremental checkpointing inputRows2 = [ ModelInputRow( rowID=2, data=[datetime.datetime(2014, 5, 23, 8, 13, 20), 2.7]), ModelInputRow( rowID=3, data=[datetime.datetime(2014, 5, 23, 8, 13, 25), 3.9]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows2), inputRows2[0].rowID, inputRows2[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows2) with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows2)) for inputRow, result in zip(inputRows2, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in inputRows2], msg=repr(attrs)) # Final run with incremental checkpointing inputRows3 = [ ModelInputRow( rowID=4, data=[datetime.datetime(2014, 5, 23, 8, 13, 30), 4.7]), ModelInputRow( rowID=5, data=[datetime.datetime(2014, 5, 23, 8, 13, 35), 5.9]), ] _LOGGER.info( "Submitting batch of %d input rows with ids=[%s..%s]...", len(inputRows3), inputRows3[0].rowID, inputRows3[-1].rowID) inputBatchID = swapperAPI.submitRequests(modelID=modelID, requests=inputRows3) with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), len(inputRows3)) for inputRow, result in zip(inputRows3, batch.objects): self.assertIsInstance(result, ModelInferenceResult) self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.rowID, inputRow.rowID) self.assertIsInstance(result.anomalyScore, float) if classifierEnabled: self.assertIsInstance(result.multiStepBestPredictions, dict) else: self.assertIsNone(result.multiStepBestPredictions) model = checkpointMgr.load(modelID) del model attrs = checkpointMgr.loadCheckpointAttributes(modelID) self.assertIn( model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual(attrs[ model_runner._ModelArchiver._BATCH_IDS_CHECKPOINT_ATTR_NAME], [inputBatchID], msg=repr(attrs)) self.assertIn(model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME, attrs, msg=repr(attrs)) self.assertSequenceEqual( model_runner._ModelArchiver._decodeDataSamples( attrs[model_runner._ModelArchiver. _INPUT_SAMPLES_SINCE_CHECKPOINT_ATTR_NAME]), [row.data for row in itertools.chain(inputRows2, inputRows3)], msg=repr(attrs)) # Delete the model _LOGGER.info("Deleting the model=%s", modelID) swapperAPI.deleteModel(modelID=modelID, commandID="deleteModelCmd1") with self._startModelRunnerSubprocess( modelID) as modelRunnerProcess: resultBatches = self._consumeResults(numExpectedBatches=1, timeout=15) self._waitForProcessToStopAndCheck(modelRunnerProcess) self.assertEqual(len(resultBatches), 1, repr(resultBatches)) # First result batch should be the first defineModel result batch = resultBatches[0] self.assertEqual(batch.modelID, modelID) self.assertEqual(len(batch.objects), 1) result = batch.objects[0] self.assertIsInstance(result, ModelCommandResult) self.assertEqual(result.method, "deleteModel") self.assertEqual(result.status, htmengineerrno.SUCCESS) self.assertEqual(result.commandID, "deleteModelCmd1") with MessageBusConnector() as bus: self.assertTrue(bus.isEmpty(swapperAPI._resultsQueueName)) # The model input queue should be deleted now self.assertFalse( bus.isMessageQeueuePresent( swapperAPI._getModelInputQName(modelID=modelID))) # The model checkpoint should be gone too with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.load(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadModelDefinition(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.loadCheckpointAttributes(modelID) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): checkpointMgr.remove(modelID)