def printStats(self): data = [] knn_class = KNN() sleep = 1 q = 0 i = 0 u = 0 d = 0 qcpu = 0 icpu = 0 ucpu = 0 dcpu = 0 ii = 0 con = 0 hostname = "localhost" idx_b_a = 0 idx_b_h = 0 idx_b_m = 0 new_bytesIn = 0 new_bytesOut = 0 new_numRequests = 0 bytesIn = 0 bytesOut = 0 numRequests = 0 network_skip_flag = 0 # just run forever until ctrl-c while True: do_normal_train = raw_input('Do normal training?: ') do_anomaly_train = raw_input('Do anomaly training?: ') do_test = raw_input('Do testing?: ') if do_normal_train == 'y': do_normal_train = True else: do_normal_train = False if do_anomaly_train == 'y': do_anomaly_train = True else: do_anomaly_train = False if do_test == 'y': do_test = True else: do_test = False # set previous values before overwriting pq = q pi = i pu = u pd = d pqcpu = qcpu picpu = icpu pucpu = ucpu pdcpu = dcpu pidx_b_a = idx_b_a pidx_b_h = idx_b_h pidx_b_m = idx_b_m # fetch the stats data = ( self.db.command( { "serverStatus" : 1 } ) ) #print data['indexCounters'];sys.exit() res = int(data['mem']['resident']) vir = int(data['mem']['virtual']) mapd = int(data['mem']['mapped']) old_bytesIn = new_bytesIn old_bytesOut = new_bytesOut old_numRequests = new_numRequests new_bytesIn = int(data['network']['bytesIn']) new_bytesOut = int(data['network']['bytesOut']) new_numRequests = int(data['network']['numRequests']) if(network_skip_flag == 0): network_skip_flag = 1 else: bytesIn = new_bytesIn - old_bytesIn bytesOut = new_bytesOut - old_bytesOut numRequests = new_numRequests - old_numRequests template="%12s%22s%12s%12s%12s%12s" header=('hostname', 'time', 'resident','virtual', 'mapped', 'load', 'bytesIn', 'bytesOut', 'numRequests') datastr="hostname, self.thetime(), res, vir, mapd, self.getload(), bytesIn, bytesOut, numRequests" point = (0, 0, 0, 0, 0, res, vir, mapd, 0, 0, 0, 0, self.getload(), bytesIn, bytesOut, numRequests) if "opcounters" in data: q = int(data['opcounters']['query']) i = int(data['opcounters']['insert']) u = int(data['opcounters']['update']) d = int(data['opcounters']['delete']) try: qcpu = int(data['opcounters']['queryCpuTime']) icpu = int(data['opcounters']['insertCpuTime']) ucpu = int(data['opcounters']['updateCpuTime']) dcpu = int(data['opcounters']['deleteCpuTime']) except KeyError: qcpu = 0 icpu = 0 ucpu = 0 dcpu = 0 con = int(data['connections']['current']) template="%12s%22s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s" header=('hostname', 'time', 'query', 'insert', 'update', \ 'delete', 'active con', 'resident', \ 'virtual','mapped','load', 'bytesIn', 'bytesOut', 'numRequests', \ 'queryCpu', 'insertCpu', 'updateCpu', 'deleteCpu') datastr="hostname, self.thetime(), (q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con, res, vir, mapd, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep" point = ((q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con, res, vir, mapd, 0, 0, 0, 0, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep) # opcounters will be in data if indexcounters is if "indexCounters" in data: #idx_b_a = int(data['indexCounters']['btree']['accesses']) #idx_b_h = int(data['indexCounters']['btree']['hits']) #idx_b_m = int(data['indexCounters']['btree']['misses']) #idx_b_o = round(float(data['indexCounters']['btree']['missRatio']),2) idx_b_a = int(data['indexCounters']['accesses']) idx_b_h = int(data['indexCounters']['hits']) idx_b_m = int(data['indexCounters']['misses']) idx_b_o = round(float(data['indexCounters']['missRatio']),2) template="%12s%22s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s" header=('hostname', 'time', 'query', 'insert', 'update', \ 'delete', 'active con', 'resident', \ 'virtual','mapped','idx acc','idx hit','idx miss','idx ratio','load', 'bytesIn', 'bytesOut', 'numRequests', \ 'queryCpu', 'insertCpu', 'updateCpu', 'deleteCpu') datastr="hostname, self.thetime(), (q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, \ con, res, vir, mapd, (idx_b_a-pidx_b_a)/sleep, (idx_b_h-pidx_b_h)/sleep, (idx_b_m-pidx_b_m)/sleep, idx_b_o, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep" point = ((q-pq)/sleep, (i-pi)/sleep,(u-pu)/sleep, (d-pd)/sleep, con, res, vir, mapd, (idx_b_a-pidx_b_a)/sleep, (idx_b_h-pidx_b_h)/sleep, (idx_b_m-pidx_b_m)/sleep, idx_b_o, self.getload(), bytesIn, bytesOut, numRequests, (qcpu-pqcpu)/sleep, (icpu-picpu)/sleep, (ucpu-pucpu)/sleep, (dcpu-pdcpu)/sleep) if do_normal_train: knn_class.trainSet.append({point:'Normal'}) knn_class.size_normal_train += 1 if do_anomaly_train: knn_class.trainSet.append({point:'Anomaly'}) knn_class.size_anomaly_train += 1 if do_test: #print point label = knn_class.getLabel(point) if (ii % 25 == 0): print template % header if do_test: # This is for testing, we print out the predicted label print template % (eval(datastr)), label else: print template % (eval(datastr)) ii += 1 time.sleep(sleep)
def Detection(self): if (db_type != 'mongodb' and db_type != '1' and db_type != 'redis' and db_type != '2'): return if self.algorithm == 'knn' or self.algorithm == '1': print("K nearest neighbor algorithm") detection_algorithm = KNN() elif self.algorithm == 'perceptron' or self.algorithm == '2': detection_algorithm = Perceptron() self.do_perceptron_learn = True # only do once before the testing! #print('Invalid algorithm. SVM not yet supported') #return else: print('Invalid algorithm selection') return have_batchfile = len(self.batchfile) != 0 if have_batchfile: batchex = BatchExecutor(self.batchfile, detection_algorithm, self.do_perceptron_learn, hostname=self.hostname, port=27017) batchex.start() metric_groups = self.metric_groups counter_keys = self.counter_keys data = [] new_metrics = {} old_metrics = {} anomaly_metrics = {} sleep = self.sleep_interval fp = open('./out.txt', 'a+') # just run forever until ctrl-c (in non-batch mode) or run until the # batch executor finishes (in batch mode) while True: if have_batchfile: allDone, duration = batchex.wait_for_measure_to_be_ready_all_done_or_failed() if allDone: break if duration == None: print("FATAL ERROR: Batch execution failed!") break else: traintest, duration = self.getTrainOrTest() #train/test for a set duration if duration == -1: print("Running forever") forever = 1 else: if duration == 0.0: raise ValueError("invalid duration") print("Running for {} seconds".format(duration)) forever = 0 ii = 0 # fetch the metrics data = self.getData() #Initial block is to set up old_metrics #since we only care about the changes in some values, not the # aggregates #put all the new metrics in new_metrics for metric_group, items in metric_groups.items(): #If the item is not a list, then take it straight from data if not items: try: new_metrics[metric_group] = float(data[metric_group]) except KeyError: pass else: #set to 0 so that we can recalculate the aggregate values #this is for resetting the values on the subsequent #iteration (e.g. testing -> training) if (metric_group in new_metrics and metric_group not in data): new_metrics[metric_group] = 0 anomaly_metrics[metric_group] = 0 #iterate over the list of items for item in items: #if the metric_group is in data, then its items will be #as well if metric_group in data: try: new_metrics[metric_group + item] = float(data[metric_group][item]) anomaly_metrics[metric_group + item] = float(data[metric_group][item]) except KeyError: pass #if the metric_group isn't in data, but its items are #then aggregate all of the items into the metric_group #This happens in Redis to aggregate all types of #commands together elif item in data: if metric_group not in new_metrics: new_metrics[metric_group] = 0 anomaly_metrics[metric_group] = 0 try: new_metrics[metric_group] += float(data[item]['calls']) anomaly_metrics[metric_group] += float(data[item]['calls']) except KeyError: pass while duration > 0 or forever == 1: time.sleep(sleep) duration -= sleep point = () # fetch the metrics data = self.getData() #put all the new metrics in new_metrics for metric_group, items in metric_groups.items(): #set old to new so that we can take the difference #between the two measurements if metric_group in new_metrics: old_metrics[metric_group] = new_metrics[metric_group] new_metrics[metric_group] = 0 anomaly_metrics[metric_group] = 0 #set to 0 so that we can recalculate the aggregate values #If the item is not a list, then take it straight from data if not items: try: new_metrics[metric_group] = float(data[metric_group]) anomaly_metrics[metric_group] = float(data[metric_group]) except KeyError: pass else: #iterate over the list of items for item in items: #if the metric_group is in data, then its items #will be as well if metric_group in data: try: old_metrics[metric_group + item] = new_metrics[metric_group + item] new_metrics[metric_group + item] = float(data[metric_group][item]) anomaly_metrics[metric_group + item] = float(data[metric_group][item]) except KeyError: pass #if the metric_group isn't in data, but its items #are then aggregate all of the items into the #metric_group #This happens in Redis to aggregate all types of #commands together elif item in data: try: new_metrics[metric_group] += float(data[item]['calls']) anomaly_metrics[metric_group] += float(data[item]['calls']) except KeyError: pass #make per second values for the counters for counter_group, items in counter_keys.items(): #if the item is not a list, then we can just subtract the #entire counter group. This is in Redis where we aggregate #all command types together if not items: if counter_group in new_metrics: try: anomaly_metrics[counter_group] = (new_metrics[counter_group] - old_metrics[counter_group]) / sleep except KeyError: pass else: #iterate over all items in the list for item in items: if counter_group in data: try: anomaly_metrics[counter_group+item] = (new_metrics[counter_group+item] - old_metrics[counter_group+item]) / sleep except KeyError: pass #create a tuple from the anomaly_metrics dictionary #yes I know this is a slow and dumb way to do this for items in anomaly_metrics: point += (anomaly_metrics[items],) sys.stdout.write("{}, {}\n".format(items, anomaly_metrics[items])) if not ii % 100000: #sys.stdout.write("{}, ".format(items)) fp.write("{}, ".format(items)) sys.stdout.write("\n") if not ii % 50: print(ii) if not ii % 100000: #print('\n') fp.write("\n") ii += 1 #print anomaly_metrics #sys.stdout.write("{}\n".format(point)) fp.write("{}\n".format(point)) if have_batchfile: batchex.signal_measuring_done(point, duration) elif traintest == '1': detection_algorithm.trainSet.append({point:'Normal'}) detection_algorithm.size_normal_train += 1 elif traintest == '2': detection_algorithm.trainSet.append({point:'Anomaly'}) detection_algorithm.size_anomaly_train += 1 elif traintest == '3': #print point if self.do_perceptron_learn == True: detection_algorithm.preProcess() self.do_perceptron_learn = False label = detection_algorithm.getLabel(point) if label == 'Normal' or label == 0: print 'Normal' #fp.write('Normal\n') elif label == 'Anomaly' or label == 1: print 'Anomaly' #fp.write('Anomaly\n') print('\n') fp.flush()