def runDataset(dataset): """ Runs through the dataset given for anomaly detection """ # set model parameters, csv path, and output csv/plot if (dataset == 0): model_par = machine_model_params csv_path = "./data/machine_temperature_system_failure.csv" nupic_output.WINDOW = 22694 nupic_output.ANOMALY_THRESHOLD = 0.97 outputCSVFile = nupic_output.NuPICFileOutput( "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_CSV") outputPlotFile = nupic_output.NuPICPlotOutput( "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_PLOT") elif (dataset == 1): model_par = twitter_model_params csv_path = "./data/Twitter_volume_GOOG.csv" print(nupic_output.WINDOW) nupic_output.WINDOW = 15841 print(nupic_output.WINDOW) outputCSVFile = nupic_output.NuPICFileOutput( "Twitter_Volume_Google_OUTPUT_ANOMALY_CSV") outputPlotFile = nupic_output.NuPICPlotOutput( "Twitter_Volume_Google_OUTPUT_ANOMALY_PLOT") else: print("No specified dataset, error will occur") model_params = None # create model model = createModel(model_par) #run model runModel(model, csv_path, outputCSVFile, outputPlotFile)
def runModel(model, inputFilePath): inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() output = nupic_anomaly_output.NuPICFileOutput("weight_output") shifter = InferenceShifter() counter = 0 for row in csvReader: counter += 1 if(counter % 20 == 0): print "Read %i lines..." % counter weight_date = datetime.datetime.strptime("2013-01-08 12:00:00", DATE_FORMAT) weight = row[2] result = model.run({ "weight": weight, "weight_date": weight_date }) result = shifter.shift(result) prediction = str(result.inferences["multiStepBestPredictions"][1]) anomalyScore = int(result.inferences["anomalyScore"]) output.write(weight_date, weight, prediction, anomalyScore)
def runIoThroughNupic(inputData, model, gymName, plot, load): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(gymName) else: output = nupic_anomaly_output.NuPICFileOutput(gymName) counter = 0 # using dummy time to debug timestamp = datetime.datetime.strptime(csvReader.next()[0], DATE_FORMAT) print("DEBUG_PRINT: initiali time", timestamp) for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = timestamp + datetime.timedelta(microseconds=10000) #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[2]) rawValue = float(row[1]) result = model.run({ "timestamp": timestamp, "wavelet_value": consumption }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, consumption, prediction, anomalyScore, rawValue) if not load: print("saving model for MODEL_DIR") model.save(MODEL_DIR) inputFile.close() output.close() return model
def runHarddriveAnomaly(plot): shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput('Harddrive Learning') else: output = nupic_anomaly_output.NuPICFileOutput('Harddrive Learning') _LOGGER.info('start with anomaly detection...') model = getModel('good') model.enableInference({'predictedField': 'class'}) _LOGGER.info('read data file...') data = pd.read_csv('harddrive-smart-data-test.csv') dictionary = data.transpose().to_dict().values() for row in dictionary: csvWriter = csv.writer(open(_OUTPUT_PATH, "wa")) csvWriter.writerow(["class", "anomaly_score"]) result = model.run(row) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences['anomalyScore'] output.write('', result.rawInput["class"], prediction, anomalyScore) csvWriter.writerow([row["class"], anomalyScore]) if anomalyScore > _ANOMALY_THRESHOLD: _LOGGER.info("Anomaly detected at [%s]. Anomaly score: %f.", result.rawInput["class"], anomalyScore)
def runIoThroughNupic(inputData, model, metric, sensor, patientId, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param csvName: CSV name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile.read().splitlines()) # skip header rows csvReader.next() csvReader.next() csvReader.next() csvName = "%s_%s_%s" % (metric, sensor, patientId) print "running model with model_params '%s'" % csvName shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(csvName) else: if not os.path.exists(MODEL_RESULTS_DIR): os.makedirs(MODEL_RESULTS_DIR) output = nupic_anomaly_output.NuPICFileOutput("%s/%s" % (MODEL_RESULTS_DIR, csvName)) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter metric_value = float(row[0]) result = model.run({ "metric_value": metric_value }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][0] anomalyScore = result.inferences["anomalyScore"] output.write(counter, metric_value, prediction, anomalyScore) output.close() inputFile.close()
def runModel(model, inputFilePath): inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() output = nupic_anomaly_output.NuPICFileOutput("disease_forecast") shifter = InferenceShifter() counter = 0 actualCount = 0 predictCount = 0 miss = 0 hit = 0 row_count = 0 for row in csvReader: if row_count > 1000: break counter += 1 if (counter % 10 == 0): print "Read %i lines..." % counter disease_date = datetime.datetime.strptime(row[5], DATE_FORMAT) disease_name = str(row[2]) result = model.run({ "disease_name": disease_name, "disease_date": disease_date }) result = shifter.shift(result) prediction = str(result.inferences["multiStepBestPredictions"][20]) anomalyScore = int(result.inferences["anomalyScore"]) output.write(disease_date, disease_name, prediction, anomalyScore) if prediction == "Malaria": predictCount += 1 if disease_name == "Malaria" and prediction != None: actualCount += 1 if prediction != None: if disease_name == prediction: hit += 1 else: miss += 1 row_count += 1 print counter, row[ 0], "Actual: ", disease_name, "Predicted: ", prediction, "------", anomalyScore print "\n Number of actuals: ", actualCount, " \n Number of predictions: ", predictCount print "\n hits: ", hit, "\n misses: ", miss - 20
def runIoThroughNupic(inputData, model, app, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(app) else: output = nupic_anomaly_output.NuPICFileOutput(app) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) #consumption = float(row[1]) packets = int(row[8]) bytes = int(row[9]) duration = float(row[10]) result = model.run({ "packets": packets, "bytes": bytes, "duration": duration }) #print result #prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.plot(counter, anomalyScore) #output.write(duration,packets,0, anomalyScore) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, gymName, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param gymName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ # t0 = time() # # model = load(open('model.pkl', 'rb')) # model = ModelFactory.loadFromCheckpoint('/home/magarwal/logoDetective/core/anomalyDetection/nupic/nupic/examples/opf/clients/hotgym/anomaly/one_gym/model_save/model.pkl') # print 'time taken in loading model = ',time()-t0 inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(gymName) else: output = nupic_anomaly_output.NuPICFileOutput(gymName) counter = 0 for row in csvReader: counter += 1 if (counter % 1000 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) feat = float(row[1]) result = model.run({"time_start": timestamp, FEAT_NAME: feat}) if plot: result = shifter.shift(result) # print 'result = ',result prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, feat, prediction, anomalyScore) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, InputName, plot): """ Handles looping over the input data and passing each row into the given model object, as well as extracting the result object and passing it into an output handler. :param inputData: file path to input data CSV :param model: OPF Model object :param InputName: Gym name, used for output handler naming :param plot: Whether to use matplotlib or not. If false, uses file output. """ inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(InputName) else: output = nupic_anomaly_output.NuPICFileOutput(InputName) counter = 0 for row in csvReader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) PredFldNm = float(row[1]) result = model.run({ "c0": timestamp, "c1": PredFldNm }) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, PredFldNm, prediction, anomalyScore) inputFile.close() output.close()
def runIoThroughNupic(inputData, model, modelName, plot): inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows headers = csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() if plot: output = nupic_anomaly_output.NuPICPlotOutput(modelName) else: output = nupic_anomaly_output.NuPICFileOutput(modelName) metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(), model.getInferenceType()) counter = 0 for row in csvReader: counter += 1 timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT) consumption = float(row[1]) result = model.run({"Time": timestamp, PREDICTED_FIELD: consumption}) result.metrics = metricsManager.update(result) if counter % 100 == 0: print "Read %i lines..." % counter print( "After %i records, 1-step altMAPE=%f", counter, result.metrics["multiStepBestPredictions:multiStep:" "errorMetric='altMAPE':steps=1:window=1000:" "field=%s" % PREDICTED_FIELD]) if plot: result = shifter.shift(result) prediction = result.inferences["multiStepBestPredictions"][1] anomalyScore = result.inferences["anomalyScore"] output.write(timestamp, consumption, prediction, anomalyScore) inputFile.close() output.close()
def run_io_through_nupic(input_data, model, metric_name, plot): input_file = open(input_data, "rb") csv_reader = csv.reader(input_file) # skip header rows csv_reader.next() csv_reader.next() csv_reader.next() shifter = InferenceShifter if plot: output = nupic_anomaly_output.NuPICPlotOutput(metric_name) else: output = nupic_anomaly_output.NuPICFileOutput(metric_name) counter = 0 for row in csv_reader: counter += 1 if (counter % 100 == 0): print "Read %i lines..." % counter timestamp = datetime.strptime(row[1], DATE_FORMAT) value = float(row[0]) result = model.run({ "timestamp": timestamp, "value": value }) if plot: result = shifter.shift(result) print "Line %d" % counter prediction = result.inferences["multiStepBestPredictions"][1] anomaly_score = result.inferences["anomalyScore"] output.write(timestamp, value, prediction, anomaly_score) input_file.close() output.close()
model.enableInference({"predictedField": "value"}) # %% DATA_DIR = "." DATE_FORMAT = "%Y-%m-%d %H:%M:%S" inputData = "%s/%s.csv" % (DATA_DIR, INPUT_FILE.replace(" ", "_")) inputFile = open(inputData, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() shifter = InferenceShifter() output = nupic_anomaly_output.NuPICFileOutput(INPUT_FILE) # %% counter = 0 valueOut = [] timestampOut = [] predictionOut = [] anomalyScoreOut = [] claLearningPeriod = None learningPeriod = 288 estimationSamples = 100 historicWindowSize = 8640 reestimationPeriod = 100 iteration = 0 _distribution = None
def runModel(model, inputFilePath, run_count): inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() output = nupic_anomaly_output.NuPICFileOutput("Vaccination_present") shifter = InferenceShifter() counter = 0 actualCount = 0 predictCount = 0 miss = 0 hit = 0 vaccine_actuals = {} vaccine_predictions = {} # accuracy_file = open("vaccine_accuracy.txt", "w") com_file_name = "vaccination_accuracy_present.csv" accuracy_dir = "vaccine_accuracy_dir/" vaccine_accuracy = csv.writer(open(accuracy_dir + com_file_name, "a"), delimiter=",") vaccine_accuracy.writerow(['Run number ' + str(run_count)]) for row in csvReader: counter += 1 if (counter % 20 == 0): # start miss and hit counts miss = 0 hit = 0 file_row = [] ## writing to file # file_line = "==============================\n" # for key, value in vaccine_actuals.iteritems(): # file_line += key+" : "+str(value)+" "+str(vaccine_predictions.get(key, 0))+"\n" for key in vaccine_actuals: if vaccine_actuals[key] != None: file_row.append(str(vaccine_actuals[key])) else: file_row.append(str(0)) if vaccine_predictions.get(key) != None: file_row.append(str(vaccine_predictions.get(key))) else: file_row.append(str(0)) # vaccine_accuracy.writerow([key, str(value), str(vaccine_predictions.get(key, 0))]) vaccine_accuracy.writerow(file_row) # write line to file #accuracy_file.write(file_line) file_row = [] for key in vaccine_actuals: vaccine_actuals[key] = 0 for key in vaccine_predictions: vaccine_predictions[key] = 0 print "Read %i lines..." % counter vaccine_date = datetime.datetime.strptime(row[2], DATE_FORMAT) vaccine_name = str(row[1]) result = model.run({ "vaccine_date": vaccine_date, "vaccine_name": vaccine_name }) result = shifter.shift(result) prediction = str(result.inferences["multiStepBestPredictions"][20]) anomalyScore = int(result.inferences["anomalyScore"]) # abnormal = anomaly_likelihood.AnomalyLikelihood() """anomalyLikelihood = abnormal.anomalyProbability( vaccine_name, anomalyScore, vaccine_date )""" output.write(vaccine_date, vaccine_name, prediction, anomalyScore) # if vaccine not in actuals if vaccine_actuals.get(vaccine_name) == None: # print "got you now", vaccine_name vaccine_actuals.update({vaccine_name: 1}) # update value if actual already exitsts else: vaccine_actuals[vaccine_name] += 1 # if vaccine not in predictions if vaccine_predictions.get(prediction) == None: vaccine_predictions.update({prediction: 1}) # update value if prediction alreadye exists else: vaccine_predictions[prediction] += 1 if prediction == vaccine_name: hit += 1 else: miss += 1 """print counter, "community member_id: ", row[0], "Actual: ", vaccine_name, "Predicted: ", prediction, "------", anomalyScore, "====>> ", anomalyLikelihood""" print counter, "Actual: ", vaccine_name, "Predicted: ", prediction, " ===== ", anomalyScore if anomalyScore == 1: print "**************************" print "**************************" print "**************************" print "**** ****" print "**** ****" print "**** ****" print "**** ", vaccine_name, " ****" print "**** ****" print "**** ****" print "**** ****" print "**************************" print "**************************" print "**************************" # close accuracy file #accuracy_file.close() print "\n Number of actuals: ", actualCount, " \n Number of predictions: ", predictCount print "\n hits: ", hit, "\n misses: ", miss print "List of actuals" print vaccine_actuals print "List of predictions" print vaccine_predictions
def runModel(model, inputFilePath, model_count): inputFile = open(inputFilePath, "rb") csvReader = csv.reader(inputFile) # skip header rows csvReader.next() csvReader.next() csvReader.next() output = nupic_anomaly_output.NuPICFileOutput("disease_forecast") # new , store prediction vs actual results_file = csv.writer(open("forecast_accuracy_com4_f"+str(model_count)+".csv", "w"), delimiter=",") results_file.writerow(['ARI_actual', 'ARI_predicted', 'Malaria_actual', 'Malaria_predicted']) shifter = InferenceShifter() counter = 0 actualCount = 0 predictCount = 0 predictCountA = 0 actualCountA = 0 miss = 0 hit = 0 for row in csvReader: counter += 1 if(counter % 20 == 0): print "Read %i lines..." % counter print"\n***Malaria***\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount print "\n***ARI***","\nNumber of actuals:", actualCountA, "\nNumber of predictions: ", predictCountA # print stats to file results_file.writerow([actualCountA, predictCountA, actualCount, predictCount]) actualCount = 0 actualCountA = 0 predictCount = 0 predictCountA = 0 disease_date = datetime.datetime.strptime(row[5], DATE_FORMAT) disease_name = str(row[2]) result = model.run({ "disease_name": disease_name, "disease_date": disease_date }) result = shifter.shift(result) prediction = str(result.inferences["multiStepBestPredictions"][20]) anomalyScore = int(result.inferences["anomalyScore"]) output.write(disease_date, disease_name, prediction, anomalyScore) if prediction == "Malaria": predictCount += 1 elif prediction == "ARI": predictCountA += 1 if disease_name == "Malaria" and prediction != None: actualCount += 1 elif disease_name == "ARI" and prediction != None: actualCountA += 1 if prediction != None: if disease_name == prediction: hit += 1 else: miss += 1 print counter, row[0], "Actual: ", disease_name, "Predicted: ", prediction, "------", anomalyScore print"\n***Malaria***\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount print "\n***ARI***","\nNumber of actuals:", actualCountA, "\nNumber of predictions: ", predictCountA,"\nhits: ", hit,"\n misses: ", miss-20