logging.error("ANOMALY FOUND!")
            if __debug__:
                print "ERROR: ANOMALY"

        Sn_1 = Sn

        if anomaly_found:
            anomaly.append(1)
        else:
            anomaly.append(0)
        
        y_target.append(target)
        y_predict.append(prediction)
        y_time.append(cur_time)

        # Achieve scrolling effect by only writing most recent data
        if len(y_time) >= matrix_length:
            y_time = y_time[-matrix_length:]
            y_target = y_target[-matrix_length:]
            y_predict = y_predict[-matrix_length:]
            anomaly = anomaly[-matrix_length:]

        writeResults(RESULTS_FILE, (y_time, y_target, y_predict, anomaly))

        print "Target:", target, 
        print "Prediction:", prediction
        if (actual_prediction < 0):
            print "Actual Predict:", actual_prediction

    row_count += 1
Exemplo n.º 2
0
    def startAnalysis(self):

        # Use filename from attackEdit instead of inputEdit if possible
        if len(self.attackList) > 0: 
            infile = str(self.attackEdit.text())
        else:
            infile = str(self.inputEdit.text())
        outfile = str(self.resultsEdit.text())
        granularity = 1
        trainingWin = 24
        forecastingInterval = 1

        print ("\nStarting analysis on %s with settings %d %d %d..." 
               % (infile, granularity, trainingWin, forecastingInterval))
               
        # Get list of features (first columns is time)
        infile = open(infile, 'rb')
        reader = csv.reader(infile)
        columns = reader.next()[1:]
        
        print "The following features were found:", columns
                
        # Algorithm settings
        algo = Algo(granularity, trainingWin, forecastingInterval, len(columns)-1)
        algo.setEMAParameter(alpha=self.emaSpin.value())
        algo.setSeverityParameters(w=self.severitySpinW.value(),
                                   L=self.severitySpinL.value())
        
        y_time = ['Timestamp']
        y_target = ['Target']
        y_predict = ['Prediction']
        anomalies = ['Anomaly']

        detected = set()
        ground_truth = set()
        
        first = True
        
        print "Beginning analysis..."
        loadingWin = LoadingWindow()
        self.mainWidget.setEnabled(False)
        count = 0
        for line in reader:

            # Read new data from file
            cur_time = float(line[0])
            new_data = np.asarray(line[1:], np.float)
            target, prediction = algo.run(new_data) # Magic!
            
            if prediction != None:
                y_time.append(cur_time)
                y_target.append(target)
                y_predict.append(float(prediction))
                
                if algo.checkSeverity(target, float(prediction)):
                    detected.add(cur_time)
                    
                    anomalies.append(1)
                else:
                    anomalies.append(0)

            cur_datetime = dt.datetime.fromtimestamp(cur_time)
            for attack in self.attackList:
                if(cur_datetime >= attack.start and cur_datetime < attack.end):
                    ground_truth.add(cur_time)
                    break
                    
            if (count % 60) == 0:
                #print "Trying time: ", cur_time
                QtGui.QApplication.processEvents()
            count += 1
            
             
        # Close the input file and save results
        infile.close()
        writeResults(outfile, (y_time, y_target, y_predict, anomalies))
        f1_scores(detected, ground_truth)
        print_stats(y_target[1:], y_predict[1:]) #Remove header
        print "Ending analysis. See %s for results." % outfile

        self.mainWidget.setEnabled(True)
        loadingWin.close()
Exemplo n.º 3
0
def main(argv):

    # Retreive settings from JSON settings file
    with open(SMART_DRIVER) as driver:
        jsonDataFile = json.load(driver)

    granularity = int(jsonDataFile['granularity'])
    training_window = int(jsonDataFile['windowSize'])
    forecasting_interval = int(jsonDataFile['forecastingInterval'])
    
    print ("\nStarting analysis on database with settings %d %d %d..." 
           % (granularity, training_window, forecasting_interval))
           
    granularity_in_seconds = granularity * 60
           
    # Initialize database
    database = Database(DB_CONFIG)
           
    # Get the list of feature numbers
    id_list = getListIDs(jsonDataFile["idSelection"])

    id_list = list(set(id_list)) # Remove duplicates
    id_list.sort()

    # Determine the range of times to pull data from    
    # If the user specified a timeframe, use that
    if(int(jsonDataFile["specifyTime"])):
       start_time = dt.datetime.strptime(jsonDataFile["beginTime"], DATE_FORMAT)
       end_time = dt.datetime.strptime(jsonDataFile["endTime"], DATE_FORMAT)

    # Otherwise, find the largest timeframe for which each feature has data
    else:
        start_time, end_time = getStartEndTimes(id_list)

    print "Start, end: ", start_time, end_time
        
    # Get the list of column headers for the features
    columns = []
    for id in id_list:
        columns.append(jsonDataFile['data'][id-1]['columnName'])
        
    columns.append(jsonDataFile['totalConsum'])
   
    #print "The following features were found:", columns

    # Algorithm settings
    algo = Algo(granularity, training_window, forecasting_interval, len(columns)-1)
    
    # Output lists
    y_time = ['Timestamp']
    y_target = ['Target']
    y_predict = ['Prediction']
    anomalies = ['Anomaly']
    
    count = 0
    
    # EWMA additions
    # alpha is adjustable on a scale of (0, 1]
    # The smaller value of alpha, the more averaging takes place
    # A value of 1.0 means no averaging happens
    #alpha = float(raw_input('Enter Value of alpha:'))
    algo.setEMAParameter(alpha=1.0)
    #algo.setEMAParameter(alpha=0.7)
    
    #Recomended Severity Parameters from Paper
    #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive
    #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive
    #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive 
     
    algo.setSeverityParameters(w=1, L=3.719) # Custom senstivity 

    detected = set()
    ground_truth = set()
    
    #==================== ANALYSIS ====================#
    print "Beginning analysis..."
    while start_time < end_time:

        # FOR SMART* ONLY
        # Some of the data seems bad on the 31st - too many NULLS
        if (start_time > dt.datetime(2012, 5, 30) and 
            start_time < dt.datetime(2012, 6, 1)):
            
            start_time = dt.datetime(2012, 6, 1)

        if(count % 240 == 0):
            print "trying time: %s " % start_time
            
        count += 1

        #Execute the query:
        stop_time = start_time + dt.timedelta(0, granularity_in_seconds)
        new_data = database.get_avg_data(start_time, stop_time, columns)
        new_data = np.asarray([max(0, data) for data in new_data]) # remove 'nan' and negative
        target, prediction = algo.run(new_data) # Magic!
        
        if prediction != None:
            y_time.append(start_time)
            y_target.append(target)
            y_predict.append(float(prediction))
            
            if algo.checkSeverity(target, float(prediction)):
                detected.add(start_time)
                anomalies.append(1)
            else:
                anomalies.append(0)

        start_time = stop_time #Increment and loop


    #==================== RESULTS ====================#
    # Save data for later graphing
    results = y_time, y_target, y_predict, anomalies
    writeResults(outfile, results)
    f1_scores(detected, ground_truth)
    print_stats(y_target[1:], y_predict[1:]) #Remove header
    print "Ending analysis. See %s for results." % outfile
    
    return results
Exemplo n.º 4
0
            logging.error("ANOMALY FOUND!")
            if __debug__:
                print "ERROR: ANOMALY"

        Sn_1 = Sn

        if anomaly_found:
            anomaly.append(1)
        else:
            anomaly.append(0)

        y_target.append(target)
        y_predict.append(prediction)
        y_time.append(cur_time)

        # Achieve scrolling effect by only writing most recent data
        if len(y_time) >= matrix_length:
            y_time = y_time[-matrix_length:]
            y_target = y_target[-matrix_length:]
            y_predict = y_predict[-matrix_length:]
            anomaly = anomaly[-matrix_length:]

        writeResults(RESULTS_FILE, (y_time, y_target, y_predict, anomaly))

        print "Target:", target,
        print "Prediction:", prediction
        if (actual_prediction < 0):
            print "Actual Predict:", actual_prediction

    row_count += 1
Exemplo n.º 5
0
def main(argv):

    # Time the analysis:
    tic = time.time()

    # Get commandline arguments
    try:
        infile = argv[1]
        granularity = int(argv[2])
        training_window = int(argv[3])
        forecasting_interval = int(argv[4])
    except Exception:
        raise RuntimeError("usage: python "
                           + argv[0]
                           + " <infile>"
                           + " <granularity>"
                           + " <training_window>"
                           + " <forecasting_interval>")

    print ("\nStarting analysis on %s with settings %d %d %d..." 
           % (infile, granularity, training_window, forecasting_interval))
           
    # Generate outfile name based on infile
    outfile = infile.rstrip('.csv') + '.results.csv'

    # Separate feature timestamps, feature data, and power data
    dataframe = pd.read_csv(infile)
    timestamps = dataframe.ix[:, 0].values
    targets = dataframe.ix[:, -1].values
    
    features_df = dataframe.ix[:, 1:-1]

    # Filter features with little or no variance
    print "\nRemoving features due to low variance:"
    for column in features_df.columns:
        values = features_df[column].values
        if (values.max() == values.min()):
            features_df = features_df.drop(column, 1)
            print column

    #dataframe = dataframe.drop('Audio Sensor', 1)
    #dataframe = dataframe.drop('Luminescence_11 (Lux)', 1)
    
    features_df = features_df[['Audio Sensor', 
                               'Temperature_10 (C)']]

    print "\nThe following features will be used: "
    for column_name in features_df.columns:
        print column_name

    features = features_df.values
    features = scaleFeatures(features)
    num_features = features.shape[1]
    
    # Algorithm settings
    algo = Algo(granularity, training_window, forecasting_interval, num_features)
    
    # Output lists
    y_time = ['Timestamp']
    y_target = ['Target']
    y_predict = ['Prediction']
    anomalies = ['Anomaly']
    
    print "Algorithm settings:"
    
    # EWMA additions
    # alpha is adjustable on a scale of (0, 1]
    # The smaller value of alpha, the more averaging takes place
    # A value of 1.0 means no averaging happens
    #alpha = float(raw_input('Enter Value of alpha:'))
    algo.setEMAParameter(alpha=1.0)
    #algo.setEMAParameter(alpha=0.75)
    #algo.setEMAParameter(alpha=0.5)
    #algo.setEMAParameter(alpha=0.25)
    
    # Recomended Severity Parameters from Paper
    #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive
    #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive
    #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive 
    algo.setSeverityParameters(w=1, L=2) # Custom senstivity 
    
    # More settings
    algo.setMaxPrediction(500)
    
    #USED For F1 Calculation
    detected = set()
    ground_truth = set()
    
    #==================== ANALYSIS ====================#
    print "\nBeginning analysis..."
    count = 0
    for row in features:

        # Update the user with the current timestamp
        cur_time = timestamps[count]
        if (count % 360) == 0:
            cur_dt = dt.datetime.fromtimestamp(cur_time)
            print "Trying time %s" % cur_dt.strftime(DATE_FORMAT)
        
        row = np.append(row, targets[count])
        target, prediction = algo.run(row) # Magic!
        
        # If there is a prediction, check for anomalies
        if prediction != None:
            y_time.append(cur_time)
            y_target.append(target)
            y_predict.append(float(prediction))
            
            if algo.checkSeverity(target, float(prediction)):
                detected.add(cur_time)
                anomalies.append(1)
            else:
                anomalies.append(0)

            # NOTE: If you plan on injecting attacks, you can add the
            # timestamps here to create a "ground truth" set, which can then
            # be used to calculate the accuracy, precision and F1 scores.
            #if cur_time >= 1338696000 and cur_time <= 1338699600:
            #    ground_truth.add(cur_time)
            #elif cur_time >= 1338955200 and cur_time <= 1338958800:
            #    ground_truth.add(cur_time)
            #elif cur_time >= 1339128000 and cur_time <= 1339131600:
            #    ground_truth.add(cur_time)

        # Loop back to the beginning
        count += 1

            
    #==================== RESULTS ====================#

    # Save data for later graphing
    results = y_time, y_target, y_predict, anomalies
    writeResults(outfile, results)
    #f1_scores(detected, ground_truth) #Uncomment to show F1 scores 
    print_stats(y_target[1:], y_predict[1:]) #Remove header
    print "Run time: ", time.time() - tic
    print "Ending analysis. See %s for results." % outfile
    
    return results