Пример #1
0
def runDataset(dataset):
    """
    Runs through the dataset given for anomaly detection

    """

    # set model parameters, csv path, and output csv/plot
    if (dataset == 0):
        model_par = machine_model_params
        csv_path = "./data/machine_temperature_system_failure.csv"
        nupic_output.WINDOW = 22694
        nupic_output.ANOMALY_THRESHOLD = 0.97
        outputCSVFile = nupic_output.NuPICFileOutput(
            "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_CSV")
        outputPlotFile = nupic_output.NuPICPlotOutput(
            "Machine_Temp_Sys_Failure_OUTPUT_ANOMALY_PLOT")
    elif (dataset == 1):
        model_par = twitter_model_params
        csv_path = "./data/Twitter_volume_GOOG.csv"
        print(nupic_output.WINDOW)
        nupic_output.WINDOW = 15841
        print(nupic_output.WINDOW)
        outputCSVFile = nupic_output.NuPICFileOutput(
            "Twitter_Volume_Google_OUTPUT_ANOMALY_CSV")
        outputPlotFile = nupic_output.NuPICPlotOutput(
            "Twitter_Volume_Google_OUTPUT_ANOMALY_PLOT")
    else:
        print("No specified dataset, error will occur")
        model_params = None

    # create model
    model = createModel(model_par)

    #run model
    runModel(model, csv_path, outputCSVFile, outputPlotFile)
Пример #2
0
def runIoThroughNupic(inputData, model, gymName, plot, load):
    """
    Handles looping over the input data and passing each row into the given model
    object, as well as extracting the result object and passing it into an output
    handler.
    :param inputData: file path to input data CSV
    :param model: OPF Model object
    :param gymName: Gym name, used for output handler naming
    :param plot: Whether to use matplotlib or not. If false, uses file output.
    """
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(gymName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(gymName)

    counter = 0

    # using dummy time to debug
    timestamp = datetime.datetime.strptime(csvReader.next()[0], DATE_FORMAT)
    print("DEBUG_PRINT: initiali time", timestamp)

    for row in csvReader:
        counter += 1

        if (counter % 100 == 0):
            print "Read %i lines..." % counter

        timestamp = timestamp + datetime.timedelta(microseconds=10000)
        #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        consumption = float(row[2])
        rawValue = float(row[1])
        result = model.run({
            "timestamp": timestamp,
            "wavelet_value": consumption
        })

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]

        output.write(timestamp, consumption, prediction, anomalyScore,
                     rawValue)

    if not load:
        print("saving model for MODEL_DIR")
        model.save(MODEL_DIR)
    inputFile.close()
    output.close()

    return model
Пример #3
0
def runModel(model):
    inputFilePath = "disease_person1.csv"
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    #skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()
    shifter = InferenceShifter()
    output = nupic_output.NuPICPlotOutput("ECG")
    counter = 0
    for row in csvReader:
        counter += 1
        if (counter % 100 == 0):
            print "Read %i lines..." % counter
        timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        #timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-6]

        value = int(row[1])
        result = model.run({"timestamp": timestamp, "value": value})

        result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.write(timestamp, value, prediction, anomalyScore)
    inputFile.close()
    output.close()
Пример #4
0
def runHarddriveAnomaly(plot):
    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput('Harddrive Learning')
    else:
        output = nupic_anomaly_output.NuPICFileOutput('Harddrive Learning')

    _LOGGER.info('start with anomaly detection...')
    model = getModel('good')
    model.enableInference({'predictedField': 'class'})

    _LOGGER.info('read data file...')

    data = pd.read_csv('harddrive-smart-data-test.csv')
    dictionary = data.transpose().to_dict().values()

    for row in dictionary:
        csvWriter = csv.writer(open(_OUTPUT_PATH, "wa"))
        csvWriter.writerow(["class", "anomaly_score"])
        result = model.run(row)

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences['anomalyScore']

        output.write('', result.rawInput["class"], prediction, anomalyScore)

        csvWriter.writerow([row["class"], anomalyScore])
        if anomalyScore > _ANOMALY_THRESHOLD:
            _LOGGER.info("Anomaly detected at [%s]. Anomaly score: %f.",
                         result.rawInput["class"], anomalyScore)
Пример #5
0
def runIoThroughNupic(inputData, model, metric, sensor, patientId, plot):
  """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param csvName: CSV name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
  inputFile = open(inputData, "rb")
  csvReader = csv.reader(inputFile.read().splitlines())
  # skip header rows
  csvReader.next()
  csvReader.next()
  csvReader.next()

  csvName = "%s_%s_%s" % (metric, sensor, patientId)
  print "running model with model_params '%s'" % csvName

  shifter = InferenceShifter()
  if plot:
    output = nupic_anomaly_output.NuPICPlotOutput(csvName)
  else:
    if not os.path.exists(MODEL_RESULTS_DIR):
      os.makedirs(MODEL_RESULTS_DIR)
    output = nupic_anomaly_output.NuPICFileOutput("%s/%s" % (MODEL_RESULTS_DIR, 
                                                             csvName))

  counter = 0
  for row in csvReader:
    counter += 1
    if (counter % 100 == 0):
      print "Read %i lines..." % counter

    metric_value = float(row[0])
    result = model.run({
      "metric_value": metric_value
    })

    if plot:
      result = shifter.shift(result)

    prediction = result.inferences["multiStepBestPredictions"][0]
    anomalyScore = result.inferences["anomalyScore"]
    output.write(counter, metric_value, prediction, anomalyScore)

  output.close()
  inputFile.close()
Пример #6
0
def runIoThroughNupic(inputData, model, app, plot):
    """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param gymName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(app)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(app)

    counter = 0
    for row in csvReader:
        counter += 1
        if (counter % 100 == 0):
            print "Read %i lines..." % counter
        #timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        #consumption = float(row[1])
        packets = int(row[8])
        bytes = int(row[9])
        duration = float(row[10])
        result = model.run({
            "packets": packets,
            "bytes": bytes,
            "duration": duration
        })

        #print result
        #prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.plot(counter, anomalyScore)
        #output.write(duration,packets,0, anomalyScore)

    inputFile.close()
    output.close()
Пример #7
0
def runIoThroughNupic(inputData, model, gymName, plot):
    """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param gymName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
    # t0 = time()
    # # model = load(open('model.pkl', 'rb'))
    # model = ModelFactory.loadFromCheckpoint('/home/magarwal/logoDetective/core/anomalyDetection/nupic/nupic/examples/opf/clients/hotgym/anomaly/one_gym/model_save/model.pkl')
    # print 'time taken in loading model = ',time()-t0

    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(gymName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(gymName)

    counter = 0
    for row in csvReader:
        counter += 1
        if (counter % 1000 == 0):
            print "Read %i lines..." % counter
        timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        feat = float(row[1])
        result = model.run({"time_start": timestamp, FEAT_NAME: feat})
        if plot:
            result = shifter.shift(result)

        # print 'result = ',result
        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.write(timestamp, feat, prediction, anomalyScore)

    inputFile.close()
    output.close()
Пример #8
0
def runModel(model, inputFilePath):
    inputFile = open(inputFilePath, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    csvReader.next()
    csvReader.next()
    csvReader.next()

    output = nupic_output.NuPICPlotOutput("Vaccination")

    shifter = InferenceShifter()
    counter = 0
    actualCount = 0
    predictCount = 0
    miss = 0
    hit = 0

    for row in csvReader:
        counter += 1
        if(counter % 10 == 0):
            print "Read %i lines..." % counter
        vaccine_date = datetime.datetime.strptime(row[2], DATE_FORMAT)
        vaccine_name = str(row[1])
        result = model.run({
            "vaccine_date": vaccine_date,
            "vaccine_name": vaccine_name
        })

        result = shifter.shift(result)
        prediction = result.inferences["multiStepBestPredictions"][1]

        anomalyScore = result.inferences["anomalyScore"]
        #output.write([vaccine_date], [vaccine_name], [prediction])
        print len(vaccine_name)
        output.write(vaccine_date, len(vaccine_name), len(prediction), anomalyScore)
        if prediction == "Yellow Fever":
            predictCount += 1
        if vaccine_name == "Yellow Fever":
            actualCount += 1
        if vaccine_name == prediction:
            hit += 1
        else:
            miss += 1
        print counter, "community member_id: ", row[0], "Actual: ", vaccine_name, "Predicted: ", prediction, "------", anomalyScore
    print"\n Number of actuals: ", actualCount," \n Number of predictions: ", predictCount
    print "\n hits: ", hit,"\n misses: ", miss
Пример #9
0
def runIoThroughNupic(inputData, model, InputName, plot):
  """
  Handles looping over the input data and passing each row into the given model
  object, as well as extracting the result object and passing it into an output
  handler.
  :param inputData: file path to input data CSV
  :param model: OPF Model object
  :param InputName: Gym name, used for output handler naming
  :param plot: Whether to use matplotlib or not. If false, uses file output.
  """
  inputFile = open(inputData, "rb")
  csvReader = csv.reader(inputFile)
  # skip header rows
  csvReader.next()
  csvReader.next()
  csvReader.next()

  shifter = InferenceShifter()
  if plot:
    output = nupic_anomaly_output.NuPICPlotOutput(InputName)
  else:
    output = nupic_anomaly_output.NuPICFileOutput(InputName)

  counter = 0
  for row in csvReader:
    counter += 1
    if (counter % 100 == 0):
      print "Read %i lines..." % counter
    timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
    PredFldNm = float(row[1])
    result = model.run({
      "c0": timestamp,
      "c1": PredFldNm 
    })


    if plot:
      result = shifter.shift(result)

    prediction = result.inferences["multiStepBestPredictions"][1]
    anomalyScore = result.inferences["anomalyScore"]
    output.write(timestamp, PredFldNm, prediction, anomalyScore)

  inputFile.close()
  output.close()
Пример #10
0
def runIoThroughNupic(inputData, model, modelName, plot):
    inputFile = open(inputData, "rb")
    csvReader = csv.reader(inputFile)
    # skip header rows
    headers = csvReader.next()
    csvReader.next()
    csvReader.next()

    shifter = InferenceShifter()
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(modelName)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(modelName)

    metricsManager = MetricsManager(_METRIC_SPECS, model.getFieldInfo(),
                                    model.getInferenceType())

    counter = 0
    for row in csvReader:
        counter += 1
        timestamp = datetime.datetime.strptime(row[0], DATE_FORMAT)
        consumption = float(row[1])
        result = model.run({"Time": timestamp, PREDICTED_FIELD: consumption})
        result.metrics = metricsManager.update(result)

        if counter % 100 == 0:
            print "Read %i lines..." % counter
            print(
                "After %i records, 1-step altMAPE=%f", counter,
                result.metrics["multiStepBestPredictions:multiStep:"
                               "errorMetric='altMAPE':steps=1:window=1000:"
                               "field=%s" % PREDICTED_FIELD])

        if plot:
            result = shifter.shift(result)

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomalyScore = result.inferences["anomalyScore"]
        output.write(timestamp, consumption, prediction, anomalyScore)

    inputFile.close()
    output.close()
Пример #11
0
def run_io_through_nupic(input_data, model, metric_name, plot):
    input_file = open(input_data, "rb")
    csv_reader = csv.reader(input_file)
    # skip header rows
    csv_reader.next()
    csv_reader.next()
    csv_reader.next()

    shifter = InferenceShifter
    if plot:
        output = nupic_anomaly_output.NuPICPlotOutput(metric_name)
    else:
        output = nupic_anomaly_output.NuPICFileOutput(metric_name)

    counter = 0
    for row in csv_reader:
        counter += 1
        if (counter % 100 == 0):
            print "Read %i lines..." % counter
        timestamp = datetime.strptime(row[1], DATE_FORMAT)
        value = float(row[0])
        result = model.run({
            "timestamp": timestamp,
            "value": value
        })

        if plot:
            result = shifter.shift(result)

        print "Line %d" % counter

        prediction = result.inferences["multiStepBestPredictions"][1]
        anomaly_score = result.inferences["anomalyScore"]
        output.write(timestamp, value, prediction, anomaly_score)

    input_file.close()
    output.close()
Пример #12
0
def process_upstream_model(model, sensors):
    """

    :param intersections: a dict of {'upstream':{'id':3043,'query':
                            3084(1+2+3+4+5)-3032(1+2+3+4)+3084(10+11)+3044(5+6+7+8+9+10+11)-3084(1+2+3+4+5)+3032(1+2+3+4)-3084(10+11)},
                                     'downstream':{'id':'3084','sensors':[1,2,3,4}}
    :return:

    """
    import re
    # from collections import defaultdict
    # # upstream_sensors = set(re.findall(r"(-?\d+\(.*?\))", sensors['upstream']['query']))
    # queries = []
    # for i in upstream_sensors:
    #     if i[0] == '-':
    #         if i[1:] not in upstream_sensors:
    #             queries.append(i)
    #     else:
    #         if '-' + i not in upstream_sensors:
    #             queries.append(i)
    # # make a dict of intersections and the sensors we need to read from them
    # upstream_intersections = defaultdict(defaultdict)
    # for query in queries:
    #     split = query.split('(')
    #     intersection, isensors = split[0], split[1][:-1].split('+')
    #     upstream_intersections[intersection]['sensors'] = map(lambda x: str(int(x)*8), isensors)
    #     upstream_intersections[intersection]['subtract'] = query[0] == '-'

    # sensors_to_fetch = [sensors['downstream']['id']] + upstream_intersections.keys()
    # readings = readings_collection.find({'site_no': sensors['downstream']['id']},
    #                                     {'anomalies': False, 'predictions': False}, no_cursor_timeout=True). \
    #     sort('datetime', pymongo.ASCENDING)


    import nupic_anomaly_output

    output = nupic_anomaly_output.NuPICPlotOutput("Traffic Volume from " + sensors['upstream']['id'] + " to " + sensors['downstream']['id'])
    # print "Upstream:", sensors['upstream']
    # print "Downstream:", sensors['downstream']

    # input()


    # with open('readings.csv', 'w') as out:
    #   import csv
    #   writer = csv.DictWriter(out, fieldnames=['timestamp', 'downstream'])
    #   writer.writeheader()
    with open('readings.csv', 'r') as infile:
        import csv
        readings = csv.DictReader(infile)
        readings.next()
        readings.next()

        for reading in readings:
            # current_readings = {i['site_no']: i for i in [next(readings) for _ in range(len(sensors_to_fetch))]+[x]}

            # times = pluck(current_readings.values(), 'datetime')
            # if not times.count(times[0]) == len(times):
            #     print "Datetime mismatch"
            #     continue

            # downstream_reading = current_readings[sensors['downstream']['id']]
            timestamp = reading['timestamp']


            timestamp = datetime.strptime(timestamp, '%Y-%m-%d %H:%M')
            # print timestamp
            # upstream_total = 0
            #
            # for intersection_id, v in upstream_intersections.items():
            #     if intersection_id != downstream_reading['site_no']:
            #         total = sum([current_readings[intersection_id]['readings'][sensor] for sensor in v['sensors']])
            #         if v['subtract']:
            #             upstream_total -= total
            #         else:
            #             upstream_total += total
            # downstream_total = sum((reading['readings'][s] for s in sensors['downstream']['sensors']))
            downstream_total = float(reading['downstream'])

            fields = {
                "timestamp": timestamp,
                'downstream': downstream_total,
                # 'upstream': upstream_total
            }
            # writer.writerow(fields)
            result = model.run(fields)
            # print result

            anomaly_score = result.inferences["anomalyScore"]
            prediction = result.inferences["multiStepBestPredictions"][1]

            # likelihood = anomaly_likelihood_helper.anomalyProbability(downstream_total, anomaly_score, timestamp)
            # print "input", downstream_total, "Pred", prediction, "anomaly_score", anomaly_score
            output.write(timestamp, downstream_total, prediction, anomaly_score)