Exemplo n.º 1
0
def runDataset(dataset):
    """
    Runs through the dataset given for anomaly detection

    """

    # set model parameters, csv path, and output csv/plot
    if (dataset == 0):
        model_par = machine_model_params
        csv_path = "./data/machine_temperature_system_failure.csv"
        nupic_output.WINDOW = 22694
        nupic_output.ANOMALY_THRESHOLD = 0.97
        outputCSVFile = nupic_output.NuPICFileOutput(
            "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_CSV")
        outputPlotFile = nupic_output.NuPICPlotOutput(
            "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_PLOT")
    elif (dataset == 1):
        model_par = twitter_model_params
        csv_path = "./data/Twitter_volume_GOOG.csv"
        print(nupic_output.WINDOW)
        nupic_output.WINDOW = 15841
        print(nupic_output.WINDOW)
        outputCSVFile = nupic_output.NuPICFileOutput(
            "Twitter_Volume_Google_OUTPUT_ANOMALY_CSV")
        outputPlotFile = nupic_output.NuPICPlotOutput(
            "Twitter_Volume_Google_OUTPUT_ANOMALY_PLOT")
    else:
        print("No specified dataset, error will occur")
        model_params = None

    # create model
    model = createModel(model_par)

    #run model
    runModel(model, csv_path, outputCSVFile, outputPlotFile)
Exemplo n.º 2
0
def runModel(model, inputFilePath):
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    output = nupic_anomaly_output.NuPICFileOutput("weight_output")

    shifter = InferenceShifter()
    counter = 0

    for row in csvReader:
        counter += 1
        if(counter % 20 == 0):
            print "Read %i lines..." % counter
        weight_date = datetime.datetime.strptime("2013-01-08 12:00:00", DATE_FORMAT)
        weight = row[2]
        result = model.run({
            "weight": weight,
            "weight_date": weight_date
        })

        result = shifter.shift(result)
        prediction = str(result.inferences["multiStepBestPredictions"][1])
        anomalyScore = int(result.inferences["anomalyScore"])

        output.write(weight_date, weight, prediction, anomalyScore)
Exemplo n.º 3
0
def runIoThroughNupic(inputData, model, gymName, plot, load):
    """
    Handles looping over the input data and passing each row into the given model
    object, as well as extracting the result object and passing it into an output
    handler.
    :param inputData: file path to input data CSV
    :param model: OPF Model object
    :param gymName: Gym name, used for output handler naming
    :param plot: Whether to use matplotlib or not. If false, uses file output.
    """
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(gymName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(gymName)

    counter = 0

    # using dummy time to debug
    timestamp = datetime.datetime.strptime(csvReader.next()[0], DATE_FORMAT)
    print("DEBUG_PRINT: initiali time", timestamp)

    for row in csvReader:
        counter += 1

        if (counter % 100 == 0):
            print "Read %i lines..." % counter

        timestamp = timestamp + datetime.timedelta(microseconds=10000)
        #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        consumption = float(row[2])
        rawValue = float(row[1])
        result = model.run({
            "timestamp": timestamp,
            "wavelet_value": consumption
        })

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]

        output.write(timestamp, consumption, prediction, anomalyScore,
                     rawValue)

    if not load:
        print("saving model for MODEL_DIR")
        model.save(MODEL_DIR)
    inputFile.close()
    output.close()

    return model
Exemplo n.º 4
0
def runHarddriveAnomaly(plot):
    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput('Harddrive Learning')
    else:
        output = nupic_anomaly_output.NuPICFileOutput('Harddrive Learning')

    _LOGGER.info('start with anomaly detection...')
    model = getModel('good')
    model.enableInference({'predictedField': 'class'})

    _LOGGER.info('read data file...')

    data = pd.read_csv('harddrive-smart-data-test.csv')
    dictionary = data.transpose().to_dict().values()

    for row in dictionary:
        csvWriter = csv.writer(open(_OUTPUT_PATH, "wa"))
        csvWriter.writerow(["class", "anomaly_score"])
        result = model.run(row)

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences['anomalyScore']

        output.write('', result.rawInput["class"], prediction, anomalyScore)

        csvWriter.writerow([row["class"], anomalyScore])
        if anomalyScore > _ANOMALY_THRESHOLD:
            _LOGGER.info("Anomaly detected at [%s]. Anomaly score: %f.",
                         result.rawInput["class"], anomalyScore)
Exemplo n.º 5
0
def runIoThroughNupic(inputData, model, metric, sensor, patientId, plot):
  """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param csvName: CSV name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
  inputFile = open(inputData, "rb")
  csvReader = csv.reader(inputFile.read().splitlines())
  # skip header rows
  csvReader.next()
  csvReader.next()
  csvReader.next()

  csvName = "%s_%s_%s" % (metric, sensor, patientId)
  print "running model with model_params '%s'" % csvName

  shifter = InferenceShifter()
  if plot:
    output = nupic_anomaly_output.NuPICPlotOutput(csvName)
  else:
    if not os.path.exists(MODEL_RESULTS_DIR):
      os.makedirs(MODEL_RESULTS_DIR)
    output = nupic_anomaly_output.NuPICFileOutput("%s/%s" % (MODEL_RESULTS_DIR, 
                                                             csvName))

  counter = 0
  for row in csvReader:
    counter += 1
    if (counter % 100 == 0):
      print "Read %i lines..." % counter

    metric_value = float(row[0])
    result = model.run({
      "metric_value": metric_value
    })

    if plot:
      result = shifter.shift(result)

    prediction = result.inferences["multiStepBestPredictions"][0]
    anomalyScore = result.inferences["anomalyScore"]
    output.write(counter, metric_value, prediction, anomalyScore)

  output.close()
  inputFile.close()
def runModel(model, inputFilePath):
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    output = nupic_anomaly_output.NuPICFileOutput("disease_forecast")

    shifter = InferenceShifter()
    counter = 0
    actualCount = 0
    predictCount = 0
    miss = 0
    hit = 0
    row_count = 0
    for row in csvReader:
        if row_count > 1000:
            break
        counter += 1
        if (counter % 10 == 0):
            print "Read %i lines..." % counter
        disease_date = datetime.datetime.strptime(row[5], DATE_FORMAT)
        disease_name = str(row[2])
        result = model.run({
            "disease_name": disease_name,
            "disease_date": disease_date
        })

        result = shifter.shift(result)
        prediction = str(result.inferences["multiStepBestPredictions"][20])
        anomalyScore = int(result.inferences["anomalyScore"])

        output.write(disease_date, disease_name, prediction, anomalyScore)
        if prediction == "Malaria":
            predictCount += 1
        if disease_name == "Malaria" and prediction != None:
            actualCount += 1
        if prediction != None:
            if disease_name == prediction:
                hit += 1
            else:
                miss += 1
        row_count += 1
        print counter, row[
            0], "Actual: ", disease_name, "Predicted: ", prediction, "------", anomalyScore
    print "\n Number of actuals: ", actualCount, " \n Number of predictions: ", predictCount
    print "\n hits: ", hit, "\n misses: ", miss - 20
Exemplo n.º 7
0
def runIoThroughNupic(inputData, model, app, plot):
    """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param gymName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(app)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(app)

    counter = 0
    for row in csvReader:
        counter += 1
        if (counter % 100 == 0):
            print "Read %i lines..." % counter
        #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        #consumption = float(row[1])
        packets = int(row[8])
        bytes = int(row[9])
        duration = float(row[10])
        result = model.run({
            "packets": packets,
            "bytes": bytes,
            "duration": duration
        })

        #print result
        #prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.plot(counter, anomalyScore)
        #output.write(duration,packets,0, anomalyScore)

    inputFile.close()
    output.close()
Exemplo n.º 8
0
def runIoThroughNupic(inputData, model, gymName, plot):
    """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param gymName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
    # t0 = time()
    # # model = load(open('model.pkl', 'rb'))
    # model = ModelFactory.loadFromCheckpoint('/home/magarwal/logoDetective/core/anomalyDetection/nupic/nupic/examples/opf/clients/hotgym/anomaly/one_gym/model_save/model.pkl')
    # print 'time taken in loading model = ',time()-t0

    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(gymName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(gymName)

    counter = 0
    for row in csvReader:
        counter += 1
        if (counter % 1000 == 0):
            print "Read %i lines..." % counter
        timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        feat = float(row[1])
        result = model.run({"time_start": timestamp, FEAT_NAME: feat})
        if plot:
            result = shifter.shift(result)

        # print 'result = ',result
        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.write(timestamp, feat, prediction, anomalyScore)

    inputFile.close()
    output.close()
Exemplo n.º 9
0
def runIoThroughNupic(inputData, model, InputName, plot):
  """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param InputName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
  inputFile = open(inputData, "rb")
  csvReader = csv.reader(inputFile)
  # skip header rows
  csvReader.next()
  csvReader.next()
  csvReader.next()

  shifter = InferenceShifter()
  if plot:
    output = nupic_anomaly_output.NuPICPlotOutput(InputName)
  else:
    output = nupic_anomaly_output.NuPICFileOutput(InputName)

  counter = 0
  for row in csvReader:
    counter += 1
    if (counter % 100 == 0):
      print "Read %i lines..." % counter
    timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
    PredFldNm = float(row[1])
    result = model.run({
      "c0": timestamp,
      "c1": PredFldNm 
    })


    if plot:
      result = shifter.shift(result)

    prediction = result.inferences["multiStepBestPredictions"][1]
    anomalyScore = result.inferences["anomalyScore"]
    output.write(timestamp, PredFldNm, prediction, anomalyScore)

  inputFile.close()
  output.close()
Exemplo n.º 10
0
def runIoThroughNupic(inputData, model, modelName, plot):
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    headers = csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(modelName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(modelName)

    metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(),
                                    model.getInferenceType())

    counter = 0
    for row in csvReader:
        counter += 1
        timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        consumption = float(row[1])
        result = model.run({"Time": timestamp, PREDICTED_FIELD: consumption})
        result.metrics = metricsManager.update(result)

        if counter % 100 == 0:
            print "Read %i lines..." % counter
            print(
                "After %i records, 1-step altMAPE=%f", counter,
                result.metrics["multiStepBestPredictions:multiStep:"
                               "errorMetric='altMAPE':steps=1:window=1000:"
                               "field=%s" % PREDICTED_FIELD])

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.write(timestamp, consumption, prediction, anomalyScore)

    inputFile.close()
    output.close()
Exemplo n.º 11
0
def run_io_through_nupic(input_data, model, metric_name, plot):
    input_file = open(input_data, "rb")
    csv_reader = csv.reader(input_file)
    # skip header rows
    csv_reader.next()
    csv_reader.next()
    csv_reader.next()

    shifter = InferenceShifter
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(metric_name)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(metric_name)

    counter = 0
    for row in csv_reader:
        counter += 1
        if (counter % 100 == 0):
            print "Read %i lines..." % counter
        timestamp = datetime.strptime(row[1], DATE_FORMAT)
        value = float(row[0])
        result = model.run({
            "timestamp": timestamp,
            "value": value
        })

        if plot:
            result = shifter.shift(result)

        print "Line %d" % counter

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomaly_score = result.inferences["anomalyScore"]
        output.write(timestamp, value, prediction, anomaly_score)

    input_file.close()
    output.close()
Exemplo n.º 12
0
model.enableInference({"predictedField": "value"})

# %%

DATA_DIR = "."
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"

inputData = "%s/%s.csv" % (DATA_DIR, INPUT_FILE.replace(" ", "_"))
inputFile = open(inputData, "rb")
csvReader = csv.reader(inputFile)
# skip header rows
csvReader.next()
csvReader.next()
csvReader.next()
shifter = InferenceShifter()
output = nupic_anomaly_output.NuPICFileOutput(INPUT_FILE)
# %%
counter = 0
valueOut = []
timestampOut = []
predictionOut = []
anomalyScoreOut = []

claLearningPeriod = None
learningPeriod = 288
estimationSamples = 100
historicWindowSize = 8640
reestimationPeriod = 100

iteration = 0
_distribution = None
def runModel(model, inputFilePath, run_count):
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    output = nupic_anomaly_output.NuPICFileOutput("Vaccination_present")

    shifter = InferenceShifter()
    counter = 0
    actualCount = 0
    predictCount = 0
    miss = 0
    hit = 0
    vaccine_actuals = {}
    vaccine_predictions = {}

    # accuracy_file = open("vaccine_accuracy.txt", "w")
    com_file_name = "vaccination_accuracy_present.csv"
    accuracy_dir = "vaccine_accuracy_dir/"
    vaccine_accuracy = csv.writer(open(accuracy_dir + com_file_name, "a"),
                                  delimiter=",")
    vaccine_accuracy.writerow(['Run number ' + str(run_count)])

    for row in csvReader:
        counter += 1
        if (counter % 20 == 0):
            # start miss and hit counts
            miss = 0
            hit = 0
            file_row = []
            ## writing to file
            # file_line = "==============================\n"
            # for key, value in vaccine_actuals.iteritems():
            # file_line += key+" : "+str(value)+"   "+str(vaccine_predictions.get(key, 0))+"\n"
            for key in vaccine_actuals:
                if vaccine_actuals[key] != None:
                    file_row.append(str(vaccine_actuals[key]))
                else:
                    file_row.append(str(0))
                if vaccine_predictions.get(key) != None:
                    file_row.append(str(vaccine_predictions.get(key)))
                else:
                    file_row.append(str(0))
                # vaccine_accuracy.writerow([key, str(value), str(vaccine_predictions.get(key, 0))])
            vaccine_accuracy.writerow(file_row)
            # write line to file
            #accuracy_file.write(file_line)
            file_row = []
            for key in vaccine_actuals:
                vaccine_actuals[key] = 0
            for key in vaccine_predictions:
                vaccine_predictions[key] = 0
            print "Read %i lines..." % counter
        vaccine_date = datetime.datetime.strptime(row[2], DATE_FORMAT)
        vaccine_name = str(row[1])
        result = model.run({
            "vaccine_date": vaccine_date,
            "vaccine_name": vaccine_name
        })

        result = shifter.shift(result)
        prediction = str(result.inferences["multiStepBestPredictions"][20])
        anomalyScore = int(result.inferences["anomalyScore"])
        # abnormal = anomaly_likelihood.AnomalyLikelihood()
        """anomalyLikelihood = abnormal.anomalyProbability(
            vaccine_name, anomalyScore, vaccine_date
        )"""

        output.write(vaccine_date, vaccine_name, prediction, anomalyScore)
        # if vaccine not in actuals
        if vaccine_actuals.get(vaccine_name) == None:
            # print "got you now", vaccine_name
            vaccine_actuals.update({vaccine_name: 1})
            # update value if actual already exitsts
        else:
            vaccine_actuals[vaccine_name] += 1
        # if vaccine not in predictions
        if vaccine_predictions.get(prediction) == None:
            vaccine_predictions.update({prediction: 1})
            # update value if prediction alreadye exists
        else:
            vaccine_predictions[prediction] += 1
        if prediction == vaccine_name:
            hit += 1
        else:
            miss += 1
        """print counter, "community member_id: ", row[0], "Actual: ", vaccine_name, "Predicted: ", prediction, "------", anomalyScore, "====>> ", anomalyLikelihood"""
        print counter, "Actual: ", vaccine_name, "Predicted: ", prediction, " =====        ", anomalyScore
        if anomalyScore == 1:
            print "**************************"
            print "**************************"
            print "**************************"
            print "****                  ****"
            print "****                  ****"
            print "****                  ****"
            print "**** ", vaccine_name, " ****"
            print "****                  ****"
            print "****                  ****"
            print "****                  ****"
            print "**************************"
            print "**************************"
            print "**************************"

    # close accuracy file
    #accuracy_file.close()
    print "\n Number of actuals: ", actualCount, " \n Number of predictions: ", predictCount
    print "\n hits: ", hit, "\n misses: ", miss
    print "List of actuals"
    print vaccine_actuals
    print "List of predictions"
    print vaccine_predictions
Exemplo n.º 14
0
def runModel(model, inputFilePath, model_count):
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    output = nupic_anomaly_output.NuPICFileOutput("disease_forecast")

    # new , store prediction vs actual
    results_file = csv.writer(open("forecast_accuracy_com4_f"+str(model_count)+".csv", "w"), delimiter=",") 
    results_file.writerow(['ARI_actual', 'ARI_predicted', 'Malaria_actual', 'Malaria_predicted'])


    shifter = InferenceShifter()
    counter = 0
    actualCount = 0
    predictCount = 0
    predictCountA = 0
    actualCountA = 0
    miss = 0
    hit = 0
    for row in csvReader:
        counter += 1
        if(counter % 20 == 0):
            print "Read %i lines..." % counter
            print"\n***Malaria***\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount
            print "\n***ARI***","\nNumber of actuals:", actualCountA, "\nNumber of predictions: ", predictCountA
            # print stats to file
            results_file.writerow([actualCountA, predictCountA, actualCount, predictCount])

            actualCount = 0
            actualCountA = 0
            predictCount = 0
            predictCountA = 0
        disease_date = datetime.datetime.strptime(row[5], DATE_FORMAT)
        disease_name = str(row[2])
        result = model.run({
            "disease_name": disease_name,
            "disease_date": disease_date
        })

        result = shifter.shift(result)
        prediction = str(result.inferences["multiStepBestPredictions"][20])
        anomalyScore = int(result.inferences["anomalyScore"])

        output.write(disease_date, disease_name, prediction, anomalyScore)
        if prediction == "Malaria":
            predictCount += 1
        elif prediction == "ARI":
            predictCountA += 1
        if disease_name == "Malaria" and prediction != None:
            actualCount += 1
        elif disease_name == "ARI" and prediction != None:
            actualCountA += 1
        if prediction != None:
            if disease_name == prediction:
                hit += 1
            else:
                miss += 1
        print counter, row[0], "Actual: ", disease_name, "Predicted: ", prediction, "------", anomalyScore
    print"\n***Malaria***\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount
    print "\n***ARI***","\nNumber of actuals:", actualCountA, "\nNumber of predictions: ", predictCountA,"\nhits: ", hit,"\n misses: ", miss-20