def histoWeighted(matrix): if matrix.__class__ != Matrix: HandleError.exit("In Histogram.histo: incorrect type: " + str(type(matrix))) if matrix.cols > 2: HandleError.exit("In Histogram.histo: matrix cols > 2") events = {} counts = {} for r in range(matrix.rows): row = matrix.getRow(r) if row[0] in events.keys(): events[row[0]] = events[row[0]] + row[1] counts[row[0]] = counts[row[0]] + 1 else: events[row[0]] = row[1] counts[row[0]] = 1 m = Matrix(len(events.keys()), 2) m.addColumn(events.keys()) n1 = NumColumn(events.values()) n2 = NumColumn(counts.values()) m.addColumn(n1.scalarDivision(n2)) return m
def euclideanDistance(self, win2): if not isinstance(win2, Window): HandleError.exit("In euclideanDistance: argument is not a window") d = self.colB.substract(win2.colB) p = d.dotProduct(d) return math.sqrt(p)
def createHyperSphere(matrix, winSize, winType='SLIDING'): c1 = matrix.getCol(0) c2 = matrix.getCol(1) rows = matrix.rows s = set([]) if winType is 'REGULAR': for i in range(rows - winSize - 1): x = i y = i + winSize - 1 subCol1 = c1.getSubColumn(x, y) subCol2 = c2.getSubColumn(x, y) w = Window(subCol1, subCol2) s.add(w) elif winType is 'SLIDING': numWins = int(math.floor(float(rows) / winSize)) for i in range(numWins): x = i * winSize y = x + winSize - 1 subCol1 = c1.getSubColumn(x, y) subCol2 = c2.getSubColumn(x, y) w = Window(subCol1, subCol2) s.add(w) else: HandleError.exit("In createHyperSphere: incorrect window type") return s
def getMetric(options): metric_id = '' if options.METRIC is None: HandleError.exit('No abnormal metric is given.\nUse -h option for help.') else: metric_name = options.METRIC # Find metric number in the file normalFile = options.NFILE file = open(normalFile, 'r') metrics = file.readline()[:-1].split(',') del(metrics[0]) file.close() metric_found = False for i in range(len(metrics)): if metric_name == metrics[i]: metric_found = True metric_id = i + 1 break if metric_found == False: HandleError.exit('Unknown metric name.') return metric_id
def normalizeAggregates(self, averageList, stdList): s = len(self.aggr) if (len(averageList) != s or len(stdList) != s): HandleError.exit("In normalizeAggregates: incorrect list sizes") for i in range(len(self.aggr)): if (stdList[i] != 0.0): self.aggr[i] = (self.aggr[i] - averageList[i]) / stdList[i] else: self.aggr[i] = 0
def getMatricsFromCorrelationIndex(index, n): count = 0 for i in range(n - 1): for j in range(n - i - 1): x = i y = x + j + 1 if index == count: return (x, y) count = count + 1 HandleError.exit('Could not find correlation index')
def __init__(self, col1, col2): if isinstance(col1, StringColumn) and isinstance(col2, NumColumn): if col1.size() != col2.size(): HandleError.exit("In window: cols of different sizes") self.colA = col1 self.colB = col2 else: if len(col1) != len(col2): HandleError.exit("In window: cols of different sizes") self.colA = StringColumn(col1) self.colB = NumColumn(col2) self.aggr = self.colB.getAggregates()
def removeColumnsButKeep(self, list=[]): if len(list) == 0: return for i in list: if i > self.cols - 1: HandleError.exit("Incorrect column index: " + str(i)) newList = [] for i in range(self.cols): if i in list: newList.append(self.listOfCols[i]) self.listOfCols = newList self.cols = len(self.listOfCols)
def findAnomalousPoints(windowsList, method): occurrenceNumber = {} subNameOccurr = {} for win in windowsList: obsSet = win.getUniqueObservations() for o in obsSet: # Parse observation if method == 'CLASSNAME_ONLY': # Only split by '-' in Java applications. if '-' in o: name = o.split('-')[1].split('$')[0] tmp = o.split('-')[1].split('$')[1:] subName = "$".join(tmp) else: name = o subName = o if name not in subNameOccurr.keys(): tmp = {} tmp[subName] = 1 subNameOccurr[name] = tmp else: if subName not in subNameOccurr[name].keys(): subNameOccurr[name][subName] = 1 else: subNameOccurr[name][ subName] = subNameOccurr[name][subName] + 1 elif method == 'CLASSNAME_AND_METHOD': if '-' in o: name = o.split('-')[1] else: name = o else: HandleError.exit('in findAnomalousPoints: unknown method') if name not in occurrenceNumber.keys(): occurrenceNumber[name] = 1 else: occurrenceNumber[name] = occurrenceNumber[name] + 1 l = sorted(occurrenceNumber, key=occurrenceNumber.get) l.reverse() ret = [] for e in l: ret.append((e, occurrenceNumber[e])) #print "org/apache/hadoop/dfs/DFSClient", subNameOccurr['org/apache/hadoop/dfs/DFSClient'] #print "org/apache/hadoop/hbase/regionserver/HRegion", subNameOccurr['org/apache/hadoop/hbase/regionserver/HRegion'] return ret
def getMode(options): select_metrics = options.SELECT_METRICS select_regions = options.SELECT_REGIONS # Can only specify one operational mode if select_metrics is True and select_regions is True: HandleError.exit('Cannot use these options together:\n --select-metrics & --select-regions. \nUse -h option for help.') if select_metrics is False and select_regions is False: HandleError.exit('Please use one of these options:\n--select-metrics OR --select-regions. \nUse -h option for help.') if select_metrics is True: mode = 'SELECT_METRICS' elif select_regions is True: mode = 'SELECT_REGIONS' return mode
def normalize(matrix): if matrix.__class__ != Matrix: HandleError.exit("In Histogram.histo: incorrect type: " + str(type(matrix))) if matrix.cols > 2: HandleError.exit("In Histogram.histo: matrix cols > 2") maximum = float(matrix.max(1)) v = [] for r in range(matrix.rows): row = matrix.getRow(r) v.append(row[1] / maximum * 100) ret = Matrix(matrix.rows, 2) ret.addColumn(matrix.getCol(0)) ret.addColumn(v) return ret
def calculateCorrelation(listX, listY): if len(listX) != len(listY): HandleError.exit("In calculateCorrelation: lists of different sizes") avg_x = numpy.average(listX) avg_y = numpy.average(listY) std_x = numpy.std(listX) std_y = numpy.std(listY) ret = 0 for i in range(len(listX)): tmp1 = (listX[i] - avg_x) / std_x tmp2 = (listY[i] - avg_y) / std_y ret = ret + (tmp1 * tmp2) ret = ret / (len(listX) - 1) if math.isnan(ret): ret = 0 return ret
def addColumn(self, col): # If column is a 'Column' class if isinstance(col, Column): self.listOfCols.append(col) return # If it's a list if (len(col) != self.rows): HandleError.exit("Invalid size of column") if (len(self.listOfCols) + 1 > self.cols): HandleError.exit("Too many columns") if (len(self.listOfCols) == 0): c = StringColumn(col) else: c = NumColumn(col) self.listOfCols.append(c)
def getMode(options): select_metrics = options.SELECT_METRICS select_regions = options.SELECT_REGIONS select_classname = options.SELECT_CLASSNAME # Can only specify one operational mode if (select_metrics is True and select_regions is True) or (select_metrics is True and select_classname is True): HandleError.exit('Cannot use these options together:\n --select-metrics & --select-regions & --select-classname. \nUse -h option for help.') if select_metrics is False and select_regions is False and select_classname is False: HandleError.exit('Please use one of these options:\n--select-metrics OR --select-regions OR --select-classname. \nUse -h option for help.') if select_metrics is True: mode = 'SELECT_METRICS' elif select_regions is True: mode = 'SELECT_REGIONS' elif select_classname is True: mode = 'SELECT_CLASSNAME' return mode
def histo(matrix): if matrix.__class__ != Matrix: HandleError.exit("In Histogram.histo: incorrect type: " + str(type(matrix))) if matrix.cols > 2: HandleError.exit("In Histogram.histo: matrix cols > 2") events = {} for r in range(matrix.rows): row = matrix.getRow(r) if row[0] in events.keys(): events[row[0]] = events[row[0]] + row[1] else: events[row[0]] = row[1] m = Matrix(len(events.keys()), 2) m.addColumn(events.keys()) m.addColumn(events.values()) return m
def aggregatesDistance(self, win2): if not isinstance(win2, Window): HandleError.exit("In aggregateDistance: argument is not a window") return Window.listDistance(win2.aggr, self.aggr)
def getPrintAbnormal(options): if options.PRINT_ABNORMAL is None: HandleError.exit('No print preference given.\nUse -h option for help.') else: return options.PRINT_ABNORMAL
def getClassName(options): if options.CLASSNAME is None: HandleError.exit('No classname given.\nUse -h option for help.') else: return options.CLASSNAME
def getNormalFile(options): if options.NFILE is None: HandleError.exit('No normal-traces file given.\nUse -h option for help.') else: return options.NFILE