示例#1
0
    def startAnalysis(self):

        # Use filename from attackEdit instead of inputEdit if possible
        if len(self.attackList) > 0: 
            infile = str(self.attackEdit.text())
        else:
            infile = str(self.inputEdit.text())
        outfile = str(self.resultsEdit.text())
        granularity = 1
        trainingWin = 24
        forecastingInterval = 1

        print ("\nStarting analysis on %s with settings %d %d %d..." 
               % (infile, granularity, trainingWin, forecastingInterval))
               
        # Get list of features (first columns is time)
        infile = open(infile, 'rb')
        reader = csv.reader(infile)
        columns = reader.next()[1:]
        
        print "The following features were found:", columns
                
        # Algorithm settings
        algo = Algo(granularity, trainingWin, forecastingInterval, len(columns)-1)
        algo.setEMAParameter(alpha=self.emaSpin.value())
        algo.setSeverityParameters(w=self.severitySpinW.value(),
                                   L=self.severitySpinL.value())
        
        y_time = ['Timestamp']
        y_target = ['Target']
        y_predict = ['Prediction']
        anomalies = ['Anomaly']

        detected = set()
        ground_truth = set()
        
        first = True
        
        print "Beginning analysis..."
        loadingWin = LoadingWindow()
        self.mainWidget.setEnabled(False)
        count = 0
        for line in reader:

            # Read new data from file
            cur_time = float(line[0])
            new_data = np.asarray(line[1:], np.float)
            target, prediction = algo.run(new_data) # Magic!
            
            if prediction != None:
                y_time.append(cur_time)
                y_target.append(target)
                y_predict.append(float(prediction))
                
                if algo.checkSeverity(target, float(prediction)):
                    detected.add(cur_time)
                    
                    anomalies.append(1)
                else:
                    anomalies.append(0)

            cur_datetime = dt.datetime.fromtimestamp(cur_time)
            for attack in self.attackList:
                if(cur_datetime >= attack.start and cur_datetime < attack.end):
                    ground_truth.add(cur_time)
                    break
                    
            if (count % 60) == 0:
                #print "Trying time: ", cur_time
                QtGui.QApplication.processEvents()
            count += 1
            
             
        # Close the input file and save results
        infile.close()
        writeResults(outfile, (y_time, y_target, y_predict, anomalies))
        f1_scores(detected, ground_truth)
        print_stats(y_target[1:], y_predict[1:]) #Remove header
        print "Ending analysis. See %s for results." % outfile

        self.mainWidget.setEnabled(True)
        loadingWin.close()
示例#2
0
def main(argv):

    # Retreive settings from JSON settings file
    with open(SMART_DRIVER) as driver:
        jsonDataFile = json.load(driver)

    granularity = int(jsonDataFile['granularity'])
    training_window = int(jsonDataFile['windowSize'])
    forecasting_interval = int(jsonDataFile['forecastingInterval'])
    
    print ("\nStarting analysis on database with settings %d %d %d..." 
           % (granularity, training_window, forecasting_interval))
           
    granularity_in_seconds = granularity * 60
           
    # Initialize database
    database = Database(DB_CONFIG)
           
    # Get the list of feature numbers
    id_list = getListIDs(jsonDataFile["idSelection"])

    id_list = list(set(id_list)) # Remove duplicates
    id_list.sort()

    # Determine the range of times to pull data from    
    # If the user specified a timeframe, use that
    if(int(jsonDataFile["specifyTime"])):
       start_time = dt.datetime.strptime(jsonDataFile["beginTime"], DATE_FORMAT)
       end_time = dt.datetime.strptime(jsonDataFile["endTime"], DATE_FORMAT)

    # Otherwise, find the largest timeframe for which each feature has data
    else:
        start_time, end_time = getStartEndTimes(id_list)

    print "Start, end: ", start_time, end_time
        
    # Get the list of column headers for the features
    columns = []
    for id in id_list:
        columns.append(jsonDataFile['data'][id-1]['columnName'])
        
    columns.append(jsonDataFile['totalConsum'])
   
    #print "The following features were found:", columns

    # Algorithm settings
    algo = Algo(granularity, training_window, forecasting_interval, len(columns)-1)
    
    # Output lists
    y_time = ['Timestamp']
    y_target = ['Target']
    y_predict = ['Prediction']
    anomalies = ['Anomaly']
    
    count = 0
    
    # EWMA additions
    # alpha is adjustable on a scale of (0, 1]
    # The smaller value of alpha, the more averaging takes place
    # A value of 1.0 means no averaging happens
    #alpha = float(raw_input('Enter Value of alpha:'))
    algo.setEMAParameter(alpha=1.0)
    #algo.setEMAParameter(alpha=0.7)
    
    #Recomended Severity Parameters from Paper
    #algo.setSeverityParameters(w=0.53, L=3.714) # Most sensitive
    #algo.setSeverityParameters(w=0.84, L=3.719) # Medium sensitive
    #algo.setSeverityParameters(w=1.00, L=3.719) # Least sensitive 
     
    algo.setSeverityParameters(w=1, L=3.719) # Custom senstivity 

    detected = set()
    ground_truth = set()
    
    #==================== ANALYSIS ====================#
    print "Beginning analysis..."
    while start_time < end_time:

        # FOR SMART* ONLY
        # Some of the data seems bad on the 31st - too many NULLS
        if (start_time > dt.datetime(2012, 5, 30) and 
            start_time < dt.datetime(2012, 6, 1)):
            
            start_time = dt.datetime(2012, 6, 1)

        if(count % 240 == 0):
            print "trying time: %s " % start_time
            
        count += 1

        #Execute the query:
        stop_time = start_time + dt.timedelta(0, granularity_in_seconds)
        new_data = database.get_avg_data(start_time, stop_time, columns)
        new_data = np.asarray([max(0, data) for data in new_data]) # remove 'nan' and negative
        target, prediction = algo.run(new_data) # Magic!
        
        if prediction != None:
            y_time.append(start_time)
            y_target.append(target)
            y_predict.append(float(prediction))
            
            if algo.checkSeverity(target, float(prediction)):
                detected.add(start_time)
                anomalies.append(1)
            else:
                anomalies.append(0)

        start_time = stop_time #Increment and loop


    #==================== RESULTS ====================#
    # Save data for later graphing
    results = y_time, y_target, y_predict, anomalies
    writeResults(outfile, results)
    f1_scores(detected, ground_truth)
    print_stats(y_target[1:], y_predict[1:]) #Remove header
    print "Ending analysis. See %s for results." % outfile
    
    return results