Exemplo n.º 1
0
    def printStats(self):
        data = []
        knn_class = KNN()
        sleep = 1
        q = 0
        i = 0
        u = 0
        d = 0
        qcpu = 0
        icpu = 0
        ucpu = 0
        dcpu = 0
        ii = 0
        con = 0
        hostname = "localhost"
        idx_b_a = 0
        idx_b_h = 0
        idx_b_m = 0
        new_bytesIn = 0
        new_bytesOut = 0
        new_numRequests = 0
        bytesIn = 0
        bytesOut = 0
        numRequests = 0
        network_skip_flag = 0

        # just run forever until ctrl-c
        while True:
            do_normal_train = raw_input('Do normal training?: ')
            do_anomaly_train = raw_input('Do anomaly training?: ')
            do_test = raw_input('Do testing?: ')
            if do_normal_train == 'y':
                do_normal_train = True
            else:
                do_normal_train = False
            if do_anomaly_train == 'y':
                do_anomaly_train = True
            else:
                do_anomaly_train = False
            if do_test == 'y':
                do_test = True
            else:
                do_test = False
            # set previous values before overwriting
            pq = q
            pi = i
            pu = u
            pd = d
            pqcpu = qcpu
            picpu = icpu
            pucpu = ucpu
            pdcpu = dcpu
            pidx_b_a = idx_b_a
            pidx_b_h = idx_b_h
            pidx_b_m = idx_b_m
            
            # fetch the stats
            data = ( self.db.command( { "serverStatus" : 1 } ) )
            #print data['indexCounters'];sys.exit()

            res = int(data['mem']['resident'])
            vir = int(data['mem']['virtual'])
            mapd = int(data['mem']['mapped'])

            old_bytesIn = new_bytesIn  
            old_bytesOut = new_bytesOut
            old_numRequests = new_numRequests 

            new_bytesIn = int(data['network']['bytesIn'])
            new_bytesOut = int(data['network']['bytesOut'])
            new_numRequests = int(data['network']['numRequests'])

            if(network_skip_flag == 0):
                network_skip_flag = 1
            else:
              bytesIn = new_bytesIn - old_bytesIn
              bytesOut = new_bytesOut - old_bytesOut
              numRequests = new_numRequests - old_numRequests


            template="%12s%22s%12s%12s%12s%12s"
            header=('hostname', 'time', 'resident','virtual', 'mapped', 'load', 'bytesIn', 'bytesOut', 'numRequests')
            datastr="hostname, self.thetime(),  res, vir, mapd, self.getload(), bytesIn, bytesOut, numRequests"
            point = (0, 0, 0, 0, 0, res, vir, mapd, 0, 0, 0, 0, self.getload(), bytesIn, bytesOut, numRequests)

            if "opcounters" in data:
                q = int(data['opcounters']['query'])
                i = int(data['opcounters']['insert'])
                u = int(data['opcounters']['update'])
                d = int(data['opcounters']['delete'])
                try:
                    qcpu = int(data['opcounters']['queryCpuTime'])
                    icpu = int(data['opcounters']['insertCpuTime'])
                    ucpu = int(data['opcounters']['updateCpuTime'])
                    dcpu = int(data['opcounters']['deleteCpuTime'])
                except KeyError:
                    qcpu = 0
                    icpu = 0
                    ucpu = 0
                    dcpu = 0
                con = int(data['connections']['current'])
              
                template="%12s%22s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s"
                header=('hostname', 'time', 'query', 'insert', 'update',  \
                        'delete', 'active con', 'resident', \
                        'virtual','mapped','load', 'bytesIn', 'bytesOut', 'numRequests', \
                        'queryCpu', 'insertCpu', 'updateCpu', 'deleteCpu')
                datastr="hostname, self.thetime(), (q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con,  res, vir, mapd, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep"
                point = ((q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con, res, vir, mapd, 0, 0, 0, 0, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep)

            # opcounters will be in data if indexcounters is
            if "indexCounters" in data:
                #idx_b_a = int(data['indexCounters']['btree']['accesses'])
                #idx_b_h = int(data['indexCounters']['btree']['hits'])
                #idx_b_m = int(data['indexCounters']['btree']['misses'])
                #idx_b_o = round(float(data['indexCounters']['btree']['missRatio']),2)
                idx_b_a = int(data['indexCounters']['accesses'])
                idx_b_h = int(data['indexCounters']['hits'])
                idx_b_m = int(data['indexCounters']['misses'])
                idx_b_o = round(float(data['indexCounters']['missRatio']),2)
                template="%12s%22s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s"
                header=('hostname', 'time', 'query', 'insert', 'update',  \
                        'delete', 'active con', 'resident', \
                        'virtual','mapped','idx acc','idx hit','idx miss','idx ratio','load', 'bytesIn', 'bytesOut', 'numRequests', \
                        'queryCpu', 'insertCpu', 'updateCpu', 'deleteCpu')
                datastr="hostname, self.thetime(), (q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, \
                         con,  res, vir, mapd, (idx_b_a-pidx_b_a)/sleep, (idx_b_h-pidx_b_h)/sleep, (idx_b_m-pidx_b_m)/sleep, idx_b_o, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep"
                point = ((q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con, res, vir, mapd, (idx_b_a-pidx_b_a)/sleep, (idx_b_h-pidx_b_h)/sleep, (idx_b_m-pidx_b_m)/sleep, idx_b_o, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep)

            if do_normal_train:
            	knn_class.trainSet.append({point:'Normal'})
                knn_class.size_normal_train += 1
            if do_anomaly_train:
                knn_class.trainSet.append({point:'Anomaly'})
                knn_class.size_anomaly_train += 1
            if do_test:
                #print point
                label = knn_class.getLabel(point)

            if (ii % 25 == 0):
                print template % header
            if do_test: # This is for testing, we print out the predicted label
                print template % (eval(datastr)), label
            else:
                print template % (eval(datastr))

            ii += 1
            
            time.sleep(sleep) 
Exemplo n.º 2
0
    def Detection(self):
        if (db_type != 'mongodb' and db_type != '1' and 
            db_type != 'redis' and db_type != '2'):
            return

        if self.algorithm == 'knn' or self.algorithm == '1':
            print("K nearest neighbor algorithm")
            detection_algorithm = KNN()
        elif self.algorithm == 'perceptron' or self.algorithm == '2':
            detection_algorithm = Perceptron()
            self.do_perceptron_learn = True # only do once before the testing!
            #print('Invalid algorithm. SVM not yet supported')
            #return
        else:
            print('Invalid algorithm selection')
            return
    
        have_batchfile = len(self.batchfile) != 0
        if have_batchfile:
            batchex = BatchExecutor(self.batchfile, detection_algorithm,
                                    self.do_perceptron_learn,
                                    hostname=self.hostname, port=27017)
            batchex.start()

        metric_groups = self.metric_groups
        counter_keys = self.counter_keys

        data = []
        new_metrics = {}
        old_metrics = {}
        anomaly_metrics = {}
        sleep = self.sleep_interval
        fp = open('./out.txt', 'a+')

        # just run forever until ctrl-c (in non-batch mode) or run until the
        # batch executor finishes (in batch mode)
        while True:
            if have_batchfile:
                allDone, duration = batchex.wait_for_measure_to_be_ready_all_done_or_failed()
                if allDone:
                    break
                if duration == None:
                    print("FATAL ERROR: Batch execution failed!")
                    break
            else:
                traintest, duration = self.getTrainOrTest()

            #train/test for a set duration
            if duration == -1:
                print("Running forever") 
                forever = 1
            else:
                if duration == 0.0:
                    raise ValueError("invalid duration")
                print("Running for {} seconds".format(duration))
                forever = 0

            ii = 0
            # fetch the metrics
            data = self.getData()
    
            #Initial block is to set up old_metrics
            #since we only care about the changes in some values, not the 
            # aggregates

            #put all the new metrics in new_metrics
            for metric_group, items in metric_groups.items():
                #If the item is not a list, then take it straight from data
                if not items:
                    try:
                        new_metrics[metric_group] = float(data[metric_group])
                    except KeyError:
                        pass
                else:
                    #set to 0 so that we can recalculate the aggregate values

                    #this is for resetting the values on the subsequent
                    #iteration (e.g. testing -> training)
                    if (metric_group in new_metrics and 
                        metric_group not in data):
                        new_metrics[metric_group] = 0
                        anomaly_metrics[metric_group] = 0

                    #iterate over the list of items
                    for item in items:
                        #if the metric_group is in data, then its items will be
                        #as well
                        if metric_group in data:
                            try:
                                new_metrics[metric_group + item] = float(data[metric_group][item])
                                anomaly_metrics[metric_group + item] = float(data[metric_group][item])
                            except KeyError:
                                pass
                        #if the metric_group isn't in data, but its items are
                        #then aggregate all of the items into the metric_group
                        #This happens in Redis to aggregate all types of
                        #commands together
                        elif item in data:
                            if metric_group not in new_metrics:
                                new_metrics[metric_group] = 0
                                anomaly_metrics[metric_group] = 0
                            try:
                                new_metrics[metric_group] += float(data[item]['calls'])
                                anomaly_metrics[metric_group] += float(data[item]['calls'])
                            except KeyError:
                                pass

            while duration > 0 or forever == 1:
                time.sleep(sleep)
                duration -= sleep
                point = ()

                
                # fetch the metrics
                data = self.getData()

                #put all the new metrics in new_metrics
                for metric_group, items in metric_groups.items():
                    #set old to new so that we can take the difference
                    #between the two measurements 
                    if metric_group in new_metrics:
                        old_metrics[metric_group] = new_metrics[metric_group]
                        new_metrics[metric_group] = 0
                        anomaly_metrics[metric_group] = 0

                    #set to 0 so that we can recalculate the aggregate values
                    #If the item is not a list, then take it straight from data
                    if not items:
                        try:
                            new_metrics[metric_group] = float(data[metric_group])
                            anomaly_metrics[metric_group] = float(data[metric_group])
                        except KeyError:
                            pass

                    else:
                        #iterate over the list of items
                        for item in items:
                            #if the metric_group is in data, then its items 
                            #will be as well
                            if metric_group in data:
                                try:
                                    old_metrics[metric_group + item] = new_metrics[metric_group + item]
                                    new_metrics[metric_group + item] = float(data[metric_group][item])
                                    anomaly_metrics[metric_group + item] = float(data[metric_group][item])
                                except KeyError:
                                    pass
                            #if the metric_group isn't in data, but its items 
                            #are then aggregate all of the items into the 
                            #metric_group
                            #This happens in Redis to aggregate all types of 
                            #commands together
                            elif item in data:
                                try:
                                    new_metrics[metric_group] += float(data[item]['calls'])
                                    anomaly_metrics[metric_group] += float(data[item]['calls'])
                                except KeyError:
                                    pass


                #make per second values for the counters
                for counter_group, items in counter_keys.items():
                    #if the item is not a list, then we can just subtract the
                    #entire counter group. This is in Redis where we aggregate
                    #all command types together
                    if not items:
                        if counter_group in new_metrics:
                            try:
                                anomaly_metrics[counter_group] = (new_metrics[counter_group] - old_metrics[counter_group]) / sleep
                            except KeyError:
                                pass
                    else:
                        #iterate over all items in the list
                        for item in items:
                            if counter_group in data:
                                try:
                                    anomaly_metrics[counter_group+item] = (new_metrics[counter_group+item] - old_metrics[counter_group+item]) / sleep
                                except KeyError:
                                    pass
            
                #create a tuple from the anomaly_metrics dictionary
                #yes I know this is a slow and dumb way to do this
                for items in anomaly_metrics:
                    point += (anomaly_metrics[items],) 
                    sys.stdout.write("{}, {}\n".format(items, anomaly_metrics[items]))
                    if not ii % 100000:
                        #sys.stdout.write("{}, ".format(items))
                        fp.write("{}, ".format(items))
                sys.stdout.write("\n")

                if not ii % 50:
                    print(ii)
                if not ii % 100000:
                    #print('\n')
                    fp.write("\n")
                ii += 1
                            
                #print anomaly_metrics
                #sys.stdout.write("{}\n".format(point))
                fp.write("{}\n".format(point))

                if have_batchfile:
                    batchex.signal_measuring_done(point, duration)
                elif traintest == '1':
                    detection_algorithm.trainSet.append({point:'Normal'})
                    detection_algorithm.size_normal_train += 1
                elif traintest == '2':
                    detection_algorithm.trainSet.append({point:'Anomaly'})
                    detection_algorithm.size_anomaly_train += 1
                elif traintest == '3':
                    #print point
                    if self.do_perceptron_learn == True:	
                        detection_algorithm.preProcess()
                        self.do_perceptron_learn = False
                    label = detection_algorithm.getLabel(point)
                    if label == 'Normal' or label == 0:
                        print 'Normal'
                        #fp.write('Normal\n')
                    elif label == 'Anomaly' or label == 1:
                        print 'Anomaly'
                        #fp.write('Anomaly\n')
                    print('\n')

                fp.flush()